diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java new file mode 100644 index 0000000..438a46a --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java @@ -0,0 +1,427 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.Writable; + +/** Serializes and deserializes various integer types into a sortable, + * variable-length byte array. + * + *

Integers, signed or unsigned, are sorted in their natural order. + * The serialization format is designed to succinctly represent small absolute + * values (i.e. -2 or 4), as these values are the most freqently encountered. + * Our design is similar in goals to Zig-Zag variable-length integer encoding, + * but we also ensure that the serialized bytes sort in natural integer sort + * order.

+ * + *

Serialization Format

+ * Variable-length integers omit all leading bits that are equal to the + * sign bit. This means we have a compact, single-byte representation for + * values like -1 and +1, but require more bytes to serialize values such as + * +2^30 and -2^30. + * + *

This abstract class performs serializations to/from a 64-bit long. The + * encoding uses a header byte followed by 0-8 data bytes. Each data byte is + * a byte from the serialized integer (in big endian order). The header byte + * format consists of implicit or explicit sign bit, a type field (or fields) + * indicating the length of the serialized integer in bytes, and the most + * significant bits of the serialized integer data.

+ * + *

Operations for setting/getting the header sign bit and type fields, as + * well as manipulating Writable objects, are deferred to subclasses. This + * design allows subclasses to choose different integer widths as well as + * different signedness properties. For example, unsigned integers may be + * implemented by treating all integers as having an implicit sign bit set to + * zero. Each subclass has a JavaDoc with the full description of the header + * format used by that particular subclass.

+ * + *

Reserved Bits

+ * Clients may reserve the most significant bits in the header byte for their + * own use. If there are R reservedBits, then the most significant R bits of the + * header byte are reserved exclusively for the client and will be initialized + * to the client-specified reserved value (default 0) during serialization. The + * remaining 8-R bits store the header information. Subclasses specify the + * maximum number of reserved bits allowed, and typical maximums are 2-3 bits. + * + *

Reserved bits are often used to efficiently embed variable-length integers + * within more complex serialized data structures while preserving sort + * ordering. For example, the {@link BigDecimalRowKey} class uses two reserved + * bits to efficiently embed a variable-length integer exponent within a + * serialized BigDecimal object.

+ * + *

NULL

+ * The header byte value 0x00 is reserved for NULL. Subclasses ensure that this + * value is used for the header byte if and only if the serialized value is + * NULL. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we can + * encode NULL values as a zero-length byte array. Otherwise, the header byte + * value 0x00 is used to serialize NULLs. Subclasses ensure this + * header byte is used if and only if the serialized value is NULL. + * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte. + * However, any reserved bits in the header byte will not be inverted. + * + *

Usage

+ * This is the fastest class for storing integers of any width. It performs no + * copies during serialization and deserialization, + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class AbstractVarIntRowKey extends RowKey +{ + protected static final byte NULL = (byte) 0x00; + + /* An extended length integer has a length >= 3 bytes. Thus we store + * the encoded length field with a bias of 3 so that we can pack the field + * into the minimum number of bits. + */ + protected static final int HEADER_EXT_LENGTH_BIAS = 0x3; + + /* Header type fields - set by subclass */ + private final byte HEADER_SINGLE; + private final byte HEADER_DOUBLE; + + /* Number of data bits in header for each header type - set by subclass */ + private final int HEADER_SINGLE_DATA_BITS; + private final int HEADER_DOUBLE_DATA_BITS; + private final int HEADER_EXT_DATA_BITS; + + /* Number of bits in the extended header length field - set by subclass */ + private final int HEADER_EXT_LENGTH_BITS; + + protected Writable lw; + protected int reservedBits, reservedValue; + + protected AbstractVarIntRowKey(byte headerSingle, int headerSingleDataBits, + byte headerDouble, int headerDoubleDataBits, int headerExtLengthBits, + int headerExtDataBits) + { + this.HEADER_SINGLE = headerSingle; + this.HEADER_SINGLE_DATA_BITS = headerSingleDataBits; + this.HEADER_DOUBLE = headerDouble; + this.HEADER_DOUBLE_DATA_BITS = headerDoubleDataBits; + this.HEADER_EXT_LENGTH_BITS = headerExtLengthBits; + this.HEADER_EXT_DATA_BITS = headerExtDataBits; + } + + /** Creates a writable object for serializing long integers. */ + abstract Writable createWritable(); + + /** Stores long integer x to Writable w. */ + abstract void setWritable(long x, Writable w); + + /** Loads a long integer from Writable w. */ + abstract long getWritable(Writable w); + + /** Gets the number of reserved bits in the header byte. */ + public int getReservedBits() { return reservedBits; } + + /** Gets the maximum number of reserved bits. + * This is equal to the minimum number of data bits in the header, + * which is always the number of data bits in the extended length + * header type. Typical values are 2-3 bits. + */ + public int getMaxReservedBits() { return HEADER_EXT_DATA_BITS; } + + /** Sets the number of reserved bits in the header byte. Must not exceed + * the value returned by @{link getMaxReservedBits}. + * @param reservedBits number of reserved header bits + * @throws IndexOutOfBoundsException if reservedBits > the maximum number + * of reserved bits + * @return this object + */ + public AbstractVarIntRowKey setReservedBits(int reservedBits) + { + if (reservedBits > getMaxReservedBits()) + throw new IndexOutOfBoundsException("Requested " + reservedBits + + " reserved bits " + "but only " + getMaxReservedBits() + " permitted"); + this.reservedBits = reservedBits; + return this; + } + + /** Sets the reserved value used in the header byte. Values are restricted + * to the number of bits specified in {@link #setReservedBits}. Any value + * outside of this range will be automatically truncated to the number of + * permitted reserved bits. The value itself is stored in the most + * significant bits of the header byte during serialization. + * + * @param reservedValue value to place in the header byte + * @return this object + */ + public AbstractVarIntRowKey setReservedValue(int reservedValue) { + this.reservedValue = reservedValue & ((1 << reservedBits) - 1); + return this; + } + + /** Gets the reserved header value. */ + public int getReservedValue() { return reservedValue; } + + /** Gets the sign bit of a 64-bit integer x. + * @return Long integer with sign bit stored in most significant bit, + * and all other bits clear + */ + abstract long getSign(long x); + + /** Reads a byte from long x. Any bytes read past the end of the long are + * set to the sign bit. + * @param byteOffset the offset of the byte to read (starting from the least + * significant byte) + */ + protected byte readByte(long x, int byteOffset) + { + if (byteOffset >= Bytes.SIZEOF_LONG) + return (byte) (getSign(x) >> Integer.SIZE - 1); + return (byte) (x >> byteOffset * 8); + } + + /** Writes byte b to long x. Assumes all bits of x are initialized to the sign + * bit. Any written past the end of the long have no effect. + * @param b the byte to write + * @param x the long value to write the byte to + * @param byteOffset the offset of the byte to write to (starting from the + * least significant byte) + * @return the result of writing byte b to long x + */ + protected long writeByte(byte b, long x, int byteOffset) + { + if (byteOffset >= Bytes.SIZEOF_LONG) + return x; + + /* We only encode bytes where a bit differs from the sign bit, so we OR + * in 1 bits from byte b if x has its sign bit clear, and mask out/clear + * the 0 bits from b if x has its sign bit set. The long casts are + * necessary for 64-bit shift offsets (see Java Language Spec. 15.19). + */ + if (getSign(x) != 0) + return x & ~(((long)~b & 0xff) << (byteOffset * 8)); + else + return x | (((long)b & 0xff) << (byteOffset * 8)); + } + + @Override + public int getSerializedLength(Object o) throws IOException { + if (o == null) + return terminate() ? 1 : 0; + + /* Compute the number of bits we must store in our variable-length integer + * serialization. This is the bit position + 1 of the most significant bit + * that differs from the sign bit, or zero if all bits are equal to the sign. + * Reference: Hacker's Delight, 5.3 "Relation to the Log Function", bitsize(x) + */ + long x = getWritable((Writable)o), + diffBits = x ^ (getSign(x) >> Long.SIZE - 1); + int numBits = Long.SIZE - Long.numberOfLeadingZeros(diffBits); + + if (numBits <= HEADER_SINGLE_DATA_BITS - reservedBits) + return 1; + else if (numBits <= HEADER_DOUBLE_DATA_BITS - reservedBits + 8) + return 2; + + /* Otherwise, x will require an extended (3-9) byte encoding. The number of + * data bytes can be computed by calculating one plus the number of + * bits rounded up to the nearest multiple of 8, after subtracting out the + * data bits that can be stored in the header. + */ + return 1 + ((numBits - HEADER_EXT_DATA_BITS + reservedBits + 7) >>> 3); + } + + /** Gets the final masked, serialized null value header byte with reserved + * bits set. + */ + protected byte getNull() { + int nullHeader = mask(NULL) & (0xff >>> reservedBits); + return (byte) (nullHeader | (reservedValue << Byte.SIZE - reservedBits)); + } + + /** Returns true if the header is for a NULL value. Assumes header is in its + * final serialized form (masked, reserved bits if any are present). + */ + protected boolean isNull(byte h) { + return (mask(h) & (0xff >>> reservedBits)) == NULL; + } + + /** Returns a header byte initialized with the specified sign bit. No masking + * or reserved bit shifts should be performed - this operation should execute + * as if reservedBits = 0 and order is ascending. + * @param sign true if the header byte stores an integer with its sign bit set + * @return header byte initialized with the specified sign bit + */ + protected abstract byte initHeader(boolean sign); + + /** Returns a header byte after performing any necessary final serialization + * operations for non-NULL headers. This is intended to prevent non-NULL + * header bytes from using the header byte reserved for NULL values. The + * header argument has already been shifted right by reservedBits to + * make room for the reservedValue. No masking has been performed for sort + * ordering (and no masking should be performed by this method). + * @param h header byte + * @return header byte after applying all final serialization operations + */ + protected byte serializeNonNullHeader(byte h) { return h; } + + /** Gets the number of data bits in the header byte. */ + protected int getNumHeaderDataBits(int length) { + if (length == 1) + return HEADER_SINGLE_DATA_BITS - reservedBits; + else if (length == 2) + return HEADER_DOUBLE_DATA_BITS - reservedBits; + return HEADER_EXT_DATA_BITS - reservedBits; + } + + /** Returns the final serialized header byte for a non-NULL variable-length + * integer. + * @param sign true if the sign bit of the integer is set + * @param length length of the serialized integer (in bytes) + * @param data most significant byte of integer data to be serialized + * @return serialized, masked header byte + */ + protected byte toHeader(boolean sign, int length, byte data) { + int b = initHeader(sign), + negSign = sign ? 0 : -1; + + if (length == 1) { + b |= (~negSign & HEADER_SINGLE); + } else if (length == 2) { + b |= (negSign & HEADER_SINGLE) | (~negSign & HEADER_DOUBLE); + } else { + int encodedLength = (length - HEADER_EXT_LENGTH_BIAS) ^ ~negSign; + encodedLength &= (1 << HEADER_EXT_LENGTH_BITS) - 1; + encodedLength <<= HEADER_EXT_DATA_BITS; + b |= (negSign & (HEADER_SINGLE|HEADER_DOUBLE)) | encodedLength; + } + + data &= (1 << getNumHeaderDataBits(length)) - 1; + b = serializeNonNullHeader((byte) ((b >>> reservedBits) | data)); + b = mask((byte)b) & (0xff >>> reservedBits); + return (byte) (b | (reservedValue << Byte.SIZE - reservedBits)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + + if (o == null) { + if (terminate()) { + b[offset] = getNull(); + RowKeyUtils.seek(w, 1); + } + return; + } + + long x = getWritable((Writable)o); + int length = getSerializedLength((Writable)o); + + b[offset] = toHeader(getSign(x) != 0, length, readByte(x, length - 1)); + for (int i = 1; i < length; i++) + b[offset + i] = mask(readByte(x, length - i - 1)); + RowKeyUtils.seek(w, length); + } + + /** Gets the sign of a header byte. The returned value will have the + * sign stored its most significant bit, and all other bits clear. Assumes the + * header byte has its mask and reserved bits, if any, removed (equivalent to + * a header byte serialized with reservedBits = 0 and order ascending). + */ + protected abstract byte getSign(byte h); + + /** Performs any initial deserialization operations on a non-NULL header byte. + * This is intended to undo any work done by @{link serializeNonNullHeader}. + * The header byte is assumed to have had its mask if, any removed (equivalent + * to a header byte serialized in ascending order). However, the header byte + * does still contains its reserved bits. + */ + protected byte deserializeNonNullHeader(byte h) { return h; } + + /** Gets the length in bytes of a variable-length integer from its header. + * Assumes the header byte has its mask and reserved bits, if any, + * removed (equivalent to a header byte serialized with reservedBits = 0 + * and order ascending). + */ + protected int getVarIntLength(byte h) { + int negSign = ~getSign(h) >> Integer.SIZE - 1; + + if (((h ^ negSign) & HEADER_SINGLE) != 0) { + return 1; + } else if (((h ^ negSign) & HEADER_DOUBLE) != 0) { + return 2; + } else { + int length = ((h ^ ~negSign) >>> HEADER_EXT_DATA_BITS); + length &= (1 << HEADER_EXT_LENGTH_BITS) - 1; + return length + HEADER_EXT_LENGTH_BIAS; + } + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + byte[] b = w.get(); + int offset = w.getOffset(); + if (w.getLength() <= 0) + return; + + if (isNull(b[offset])) { + RowKeyUtils.seek(w, 1); + } else { + byte h = (byte) (deserializeNonNullHeader(mask(b[offset]))); + RowKeyUtils.seek(w, getVarIntLength((byte) (h << reservedBits))); + } + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] b = w.get(); + int offset = w.getOffset(); + if (w.getLength() <= 0) + return null; + + if (isNull(b[offset])) { + RowKeyUtils.seek(w, 1); + return null; + } + + byte h = (byte) (deserializeNonNullHeader(mask(b[offset])) << reservedBits); + int length = getVarIntLength(h); + + long x = (long)getSign(h) >> Long.SIZE - 1; + byte d = (byte) (x << getNumHeaderDataBits(length)); + d |= (byte)((h >>> reservedBits) & ((1 << getNumHeaderDataBits(length))-1)); + + x = writeByte(d, x, length - 1); + for (int i = 1; i < length; i++) + x = writeByte(mask(b[offset + i]), x, length - i - 1); + RowKeyUtils.seek(w, length); + + if (lw == null) + lw = createWritable(); + setWritable(x, lw); + return lw; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java new file mode 100644 index 0000000..83bb6d8 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java @@ -0,0 +1,413 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +/** Serializes and deserializes BigDecimal objects into a sortable + * byte array representation. + * + *

This format ensures that serialized byte values sort in the natural + * order of a {@link BigDecimal} (as ordered by {@link BigDecimal#compareTo}. + * NULL values compare less than any non-NULL value.

+ * + *

Serialization Overview

+ * A BigDecimal object is composed of a power of 10 exponent scale + * and an unscaled, arbitrary precision integer significand. The value of the + * BigDecimal is unscaled base 2 significand × 10scale. The + * significand is an arbitrary precision {@link BigInteger}, while + * the scale is a signed 32-bit int. + * + *

This encoding format converts a canonicalized BigDecimal into an a + * power-of-10 adjusted exponent, and an unscaled arbitrary precision base-10 + * integer significand. As described in {@link BigDecimal#toString}, an + * adjusted exponent is equal to the scale + precision - 1, where + * precision is the number of digits in the unscaled base 10 significand + * (with trailing zeroes removed).

+ * + *

To serialize the BigDecimal, we first serialize the adjusted exponent + * combined with a few leading header bits using a subclass of + * {@link IntWritableRowKey} (header bits are packed into the adjusted exponent + * serialization format using {@link IntWritableRowKey#setReservedValue}). Then + * we serialize the base 10 significand using a BCD encoding format described + * below. The resulting byte array sorts in natural BigDecimal + * sort order.

+ * + *

Canonicalization

+ * All BigDecimal values first go through canonicalization by stripping any + * trailing zeros using the {@link BigDecimal#stripTrailingZeros} method. + * This avoids having multiple numerically equivalent byte representations, and + * also ensures that no space is wasted storing redundant trailing zeros. + * + *

Base Normalization

+ * Next we convert the arbitrary precision BigInteger significand + * to base 10, so that the scale and significand have a common base and the + * adjusted exponent can be calculated. We cannot use two as a common base + * because some powers of 10 (such as 10-1) have infinite + * binary representations. We perform the base 10 conversion on the significand + * using {@link BigInteger#toString}. We remove the leading '-' if the + * significand value is negative, and encode the resulting decimal String into + * bytes using the Binary Coded Decimal format described below. We ignore the + * significand sign bit when computing the BCD serialization because + * the significant sign bit is encoded into the header byte, as described in the + * Header section. + * + *

Zero Nibble Terminated Binary Coded Decimals

+ * We convert decimal Strings into Binary Coded Decimal by mapping + * the ASCII characters '0' … '9' to integers 1 … 10. Each ASCII + * digit is encoded into a 4-bit nibble. There are two nibbles per byte, stored + * in big-endian order. A nibble of 0 is used as terminator to indicate the end + * of the BCD encoded string. This ensures that shorter strings that are the + * prefix of a longer string will always compare less than the longer string, as + * the terminator is a smaller value that any decimal digit. + * + *

The BCD encoding requires an extra byte of space for the terminator + * only if there are an even number of characters (and implicit termination is + * not allowed). An odd-length string does not use the least significant nibble + * of its last byte, and thus can store a zero terminator nibble without + * requiring any additional bytes.

+ * + *

Exponent

+ * The adjusted exponent is defined as scale + precision - 1, where + * precision is equal to the number of the digits in the base 10 unscaled + * significand. We translate the adjusted exponent into a variable-length byte + * array using a subclass of {@link IntWritableRowKey}, with two reserved bits + * used to encode header information. + * + *

The adjusted exponent is the sum of two 32-bit integers minus one, which + * requires 33 bits of storage in the worst case. Given two reserved bits, the + * IntWritable row key format can serialize integers with up to 33 data + * bits, not including the sign bit. However, this format truncates all values + * in memory to fit into a 32-bit integer.

+ * + *

To use the more efficient serialization format employed by + * IntWritable while avoiding 32-bit truncation, the + * ExponentRowKey class subclasses IntWritableRowKey + * to use LongWritable rather than IntWritable objects + * for storing values in memory. The byte serialization format remains unchanged + * (and is slightly more efficient for 33-bit objects than the format employed by + * {@link LongWritableRowKey}).

+ * + *

Header

+ * The header encodes the type of the BigDecimal: null, negative, zero, or + * positive. These types are assigned to integer values 0-3, respectively. + * We use two reserved header bits for the header value, and serialize as part + * of the adjusted exponent using + * IntWritableRowKey.setReservedValue. If the BigDecimal is NULL + * or zero, the associated adjusted exponent is also NULL (as there is no finite + * power of 10 that can produce a value of NULL or zero) and there is no + * significand. For positive or negative BigDecimals, the adjusted exponent and + * significand are always present, and the significand is serialized after the + * adjusted exponent. + * + *

If the header is negative, then all other serialized bits except for the + * two-bit header are logically inverted. This is to preserve sort order, as + * negative numbers with larger exponents or significands should compare less + * than negative numbers with smaller exponents or significands.

+ * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) all + * serialized bytes, including the header bits in the adjusted exponent. We + * perform this negation on all serialized bytes even if we have already + * negated bytes once due to a negative header value. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we + * encode NULL values as a zero-length byte array instead of the format + * specified above. We also omit the trailing terminator byte in our BCD + * representation (which is only required for even-length BCD serializations + * anyway). Implicit termination is discussed further in + * {@link RowKey}. + * + *

Usage

+ * This is the second fastest class for storing BigDecimal objects. + * Two copies are performed during serialization and three for deserialization. + * Unfortunately, as BigDecimal objects are immutable, they cannot + * be re-used during deserialization. Each deserialization must allocate a new + * BigDecimal. There is currently no available mutable + * BigDecimal equivalent. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class BigDecimalRowKey extends RowKey +{ + /* Header types */ + protected static final byte HEADER_NULL = 0x00; + protected static final byte HEADER_NEGATIVE = 0x01; + protected static final byte HEADER_ZERO = 0x02; + protected static final byte HEADER_POSITIVE = 0x03; + + /* Number of header bits */ + protected static final int HEADER_BITS = 0x2; + + protected LongWritable lw; + protected ExponentRowKey expKey; + protected byte signMask; + + public BigDecimalRowKey() { + expKey = new ExponentRowKey(); + expKey.setReservedBits(HEADER_BITS).setTermination(Termination.MUST); + } + + @Override + public RowKey setOrder(Order order) { + expKey.setOrder(order); + return super.setOrder(order); + } + + protected byte getSignMask() { return signMask; } + + protected void resetSignMask() { setSignMask((byte)0); } + + protected void setSignMask(byte signMask) { this.signMask = signMask; } + + @Override + protected byte mask(byte b) { + return (byte) (b ^ order.mask() ^ signMask); + } + + @Override + public Class getSerializedClass() { return BigDecimal.class; } + + /** Gets the length of a String if serialized in our BCD format. We require + * 1 byte for every 2 characters, rounding up. Furthermore, if the number + * of characters is even, we require an additional byte for the + * terminator nibble if terminate() is true. + */ + protected int getSerializedLength(String s) { + return (s.length() + (terminate() ? 2 : 1)) >>> 1; + } + + /** Serializes a decimal String s into packed, zero nibble-terminated BCD + * format. After this operation completes, the position (length) of the byte + * buffer is incremented (decremented) by the number of bytes written. + * @param s unsigned decimal string to convert to BCD + * @param w byte buffer to store the BCD bytes + */ + protected void serializeBCD(String s, ImmutableBytesWritable w) { + byte[] b = w.get(); + int offset = w.getOffset(), + strLength = s.length(), + bcdLength = getSerializedLength(s); + + for (int i = 0; i < bcdLength; i++) { + byte bcd = 0; /* initialize both nibbles to zero terminator */ + int strPos = 2 * i; + if (strPos < strLength) + bcd = (byte) (1 + Character.digit(s.charAt(strPos), 10) << 4); + if (++strPos < strLength) + bcd |= (byte) (1 + Character.digit(s.charAt(strPos), 10)); + b[offset + i] = mask(bcd); + } + + RowKeyUtils.seek(w, bcdLength); + } + + /** Converts an arbitrary precision integer to an unsigned decimal string. */ + protected String getDecimalString(BigInteger i) { + String s = i.toString(); + return i.signum() >= 0 ? s : s.substring(1); /* skip leading '-' */ + } + + @Override + public int getSerializedLength(Object o) throws IOException { + if (o == null) + return terminate() ? expKey.getSerializedLength(null) : 0; + + BigDecimal d = ((BigDecimal)o).stripTrailingZeros(); + BigInteger i = d.unscaledValue(); + if (i.signum() == 0) + return expKey.getSerializedLength(null); + + String s = getDecimalString(i); + if (lw == null) + lw = new LongWritable(); + lw.set((long)s.length() + -d.scale() -1L); + + return expKey.getSerializedLength(lw) + getSerializedLength(s); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + resetSignMask(); + + if (o == null) { + if (terminate()) { + expKey.setReservedValue(mask(HEADER_NULL)); + expKey.serialize(null, w); + } + return; + } + + BigDecimal d = ((BigDecimal)o).stripTrailingZeros(); + BigInteger i = d.unscaledValue(); + if (i.signum() == 0) { + expKey.setReservedValue(mask(HEADER_ZERO)); + expKey.serialize(null, w); + return; + } + + byte header = i.signum() < 0 ? HEADER_NEGATIVE : HEADER_POSITIVE; + expKey.setReservedValue(mask(header)); + + String s = getDecimalString(i); + /* Adjusted exponent = precision + scale - 1 */ + long precision = s.length(), + exp = precision + -d.scale() -1L; + if (lw == null) + lw = new LongWritable(); + lw.set(exp); + + setSignMask((byte) (i.signum() >> Integer.SIZE - 1)); + expKey.serialize(lw, w); + serializeBCD(s, w); + } + + /** Decodes a Binary Coded Decimal digit and adds it to a string. Returns + * true (and leaves string unmodified) if digit is the terminator byte. + * Returns false otherwise. + */ + protected boolean addDigit(byte bcd, StringBuilder sb) { + if (bcd == 0) + return true; + sb.append((char) ('0' + bcd - 1)); + return false; + } + + /** Converts a packed, zero nibble-terminated BCD byte array into an unsigned + * decimal String. + */ + protected String deserializeBCD(ImmutableBytesWritable w) { + byte[] b = w.get(); + int offset = w.getOffset(), + len = w.getLength(), + i = 0; + + StringBuilder sb = new StringBuilder(); + while(i < len) { + byte c = mask(b[offset + i++]); + if (addDigit((byte) ((c >>> 4) & 0xf), sb) + || addDigit((byte) (c & 0xf), sb)) + break; + } + + RowKeyUtils.seek(w, i); + return sb.toString(); + } + + protected int getBCDEncodedLength(ImmutableBytesWritable w) { + byte[] b = w.get(); + int offset = w.getOffset(), + len = w.getLength(), + i = 0; + + while (i < len) { + byte c = mask(b[offset + i++]); + if (((c & 0xf0) == 0) || ((c & 0x0f) == 0)) + break; + } + + return i; + } + + /** Deserializes BigDecimal header from exponent byte. This method will set + * sign mask to -1 if header is {@link #HEADER_NEGATIVE}, 0 otherwise. + * @param b most significant byte of exponent (header byte) + * @return the BigDecimal header stored in byte b + */ + protected byte deserializeHeader(byte b) { + resetSignMask(); + byte h = (byte) ((mask(b) & 0xff) >>> Byte.SIZE - HEADER_BITS); + setSignMask((byte) (h == HEADER_NEGATIVE ? -1 : 0)); + return h; + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + if (w.getLength() <= 0) + return; + + byte b = w.get()[w.getOffset()]; + deserializeHeader(b); + expKey.skip(w); + if (expKey.isNull(b)) + return; + RowKeyUtils.seek(w, getBCDEncodedLength(w)); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] b = w.get(); + int offset = w.getOffset(); + + if (w.getLength() <= 0) + return null; + + byte h = deserializeHeader(b[offset]); + LongWritable o = (LongWritable) expKey.deserialize(w); + if (o == null) + return h == HEADER_NULL ? null : BigDecimal.ZERO; + + long exp = o.get(); + String s = deserializeBCD(w); + + int precision = s.length(), + scale = (int) (exp - precision + 1L); + + BigInteger i = new BigInteger(h == HEADER_POSITIVE ? s : '-' + s); + return new BigDecimal(i, -scale); + } + + private class ExponentRowKey extends IntWritableRowKey { + /* The maximum value that can be stored by IntWritableRowKey's serialization + * format (excluding the sign bit) is a 35-bit value, which is enough to + * store the 33-bit adjusted exponent + two reserved bits. We override the + * Writable methods so that a long is used to store the serialization result + * in memory, while continuing to use IntWritableRowKey's serialization + * format for byte serialization/deserialization. + */ + @Override + public Class getSerializedClass() { return LongWritable.class; } + + @Override + Writable createWritable() { return new LongWritable(); } + + @Override + void setWritable(long x, Writable w) { ((LongWritable)w).set(x); } + + @Override + long getWritable(Writable w) { return ((LongWritable)w).get(); } + + @Override + protected byte mask(byte b) { + return (byte) (b ^ order.mask() ^ getSignMask()); + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java new file mode 100644 index 0000000..755b85f --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.DoubleWritable; + +/** Serializes and deserializes Doubles into a sortable byte array + * representation. + * + *

The serialization and deserialization method are identical to + * {@link DoubleWritableRowKey} after converting the DoubleWritable to/from a + * Double.

+ * + *

Usage

+ * This is the slower class for storing doubles. No copies are made when + * serializing and deserializing, but unfortunately Double objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive doubles are first passed to + * {@link Double#valueOf}, so boxed Double values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class DoubleRowKey extends DoubleWritableRowKey +{ + private DoubleWritable dw; + + @Override + public Class getSerializedClass() { return Double.class; } + + protected Object toDoubleWritable(Object o) { + if (o == null || o instanceof DoubleWritable) + return o; + if (dw == null) + dw = new DoubleWritable(); + dw.set((Double)o); + return dw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toDoubleWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toDoubleWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + DoubleWritable dw = (DoubleWritable) super.deserialize(w); + if (dw == null) + return dw; + + return Double.valueOf(dw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java new file mode 100644 index 0000000..fa9627d --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java @@ -0,0 +1,171 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.DoubleWritable; + +/** Serializes and deserializes DoubleWritables into a sortable byte array + * representation. + * + *

This format ensures the following total ordering of floating point values: + * NULL < Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... + * < -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ... + * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN

+ * + *

Serialization Format

+ *

Floating point numbers are encoded as specified in IEEE 754. A 64-bit double + * precision float consists of a sign bit, 11-bit unsigned exponent encoded + * in offset-1023 notation, and a 52-bit significand. The format is described + * further in the + * Double Precision Floating Point Wikipedia page

+ * + *

The value of a normal float is + * -1 sign bit × 2exponent - 1023 + * × 1.significand

+ * + *

The IEE754 floating point format already preserves sort ordering + * for positive floating point numbers when the raw bytes are compared in + * most significant byte order. This is discussed further at + * + * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm + *

+ * + *

Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is less + * than negative 1), and that all negative numbers compare less than any + * positive number. To accomplish this, we invert the sign bit of all floating + * point numbers, and we also invert the exponent and significand bits if the + * floating point number was negative.

+ * + *

More specifically, we first store the floating point bits into a 64-bit + * long l using {@link Double#doubleToLongBits}. This method + * collapses all NaNs into a single, canonical NaN value but otherwise leaves + * the bits unchanged. We then compute

+ * + *
+ * l ^= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE
+ * 
+ * + *

which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of l in most significant byte + * order is equivalent to performing a double precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't + * compare equal to anything when performing floating point comparisons).

+ * + *

Finally, we must encode NULL efficiently. Fortunately, l can + * never have all of its bits set to one (equivalent to -1 signed in two's + * complement) as this value corresponds to a NaN removed during NaN + * canonicalization. Thus, we can encode NULL as a long zero, and all non-NULL + * numbers are translated to a long as specified above and then incremented by + * 1, which is guaranteed not to cause unsigned overflow as l must + * have at least one bit set to zero.

+ * + *

The resulting long integer is then converted into a byte array by + * serializing the long one byte at a time in most significant byte order. + * All serialized values are 8 bytes in length

+ * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we can + * encode NULL values as a zero-length byte array instead of using the 8 byte + * encoding specified above. Implicit termination is discussed further in + * {@link RowKey}. + * + *

Usage

+ * This is the fastest class for storing doubles. It performs no object copies + * during serialization and deserialization. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class DoubleWritableRowKey extends RowKey +{ + private static final long NULL = 0; + private DoubleWritable dw; + + @Override + public Class getSerializedClass() { return DoubleWritable.class; } + + @Override + public int getSerializedLength(Object o) throws IOException { + if (o == null && !terminate()) + return 0; + return Bytes.SIZEOF_LONG; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + long l; + + if (o == null) { + if (!terminate()) + return; + l = NULL; + } else { + l = Double.doubleToLongBits(((DoubleWritable)o).get()); + l = (l ^ ((l >> Long.SIZE - 1) | Long.MIN_VALUE)) + 1; + } + + Bytes.putLong(b, offset, l ^ order.mask()); + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + if (w.getLength() <= 0) + return; + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] s = w.get(); + int offset = w.getOffset(); + if (w.getLength() <= 0) + return null; + + try { + long l = Bytes.toLong(s, offset) ^ order.mask(); + if (l == NULL) + return null; + + if (dw == null) + dw = new DoubleWritable(); + + l--; + l ^= (~l >> Long.SIZE - 1) | Long.MIN_VALUE; + dw.set(Double.longBitsToDouble(l)); + return dw; + } finally { + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java new file mode 100644 index 0000000..56171de --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java @@ -0,0 +1,82 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.BytesWritable; + +/** + * Serialize and deserialize byte arrays into a fixed-length byte array. + *

+ * The serialization and deserialization methods are identical to + * {@link FixedBytesWritableRowKey} after converting the BytesWritable + * to/from a byte[]. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedByteArrayRowKey extends FixedBytesWritableRowKey { + + public FixedByteArrayRowKey(int length) { + super(length); + } + + @Override + public Class getSerializedClass() { + return byte[].class; + } + + protected Object toBytesWritable(Object o) { + if (o == null || o instanceof BytesWritable) + return o; + else { + final BytesWritable bw = new BytesWritable(); + final byte[] bytes = (byte[]) o; + bw.set(bytes, 0, bytes.length); + return bw; + } + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toBytesWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toBytesWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + BytesWritable bw = (BytesWritable) super.deserialize(w); + if (bw == null) { + return null; + } + else { + final byte[] result = new byte[bw.getLength()]; + System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength()); + + return result; + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java new file mode 100644 index 0000000..94e6c23 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java @@ -0,0 +1,106 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.BytesWritable; + +/** + * Serializes and deserializes BytesWritable into a fixed length sortable + * representation. + *

+ * TODO: this doesn't support NULL values (because they can not be distinguished + * from empty arrays). Should I explicitly check for this? + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedBytesWritableRowKey extends RowKey { + private int length; + + public FixedBytesWritableRowKey(int length) { + this.length = length; + } + + @Override + public Class getSerializedClass() { + return BytesWritable.class; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return length; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException { + byte[] bytesToWriteIn = w.get(); + int writeOffset = w.getOffset(); + + final BytesWritable bytesWritableToWrite = (BytesWritable) o; + final int srcLen = bytesWritableToWrite.getLength(); + final byte[] bytesToWrite = bytesWritableToWrite.getBytes(); + + if (srcLen != length) + throw new IllegalArgumentException( + "can only serialize byte arrays of length " + length + ", not " + srcLen); + + // apply the sort order mask + final byte[] maskedBytesToWrite = maskAll(bytesToWrite, order, 0, srcLen); + + Bytes.putBytes(bytesToWriteIn, writeOffset, maskedBytesToWrite, 0, srcLen); + RowKeyUtils.seek(w, srcLen); + } + + private byte[] maskAll(byte[] bytes, Order order, int offset, int length) { + if (order.mask() == 0) { + return bytes; // xor with zeroes has no effect anyways + } else { + final byte[] masked = new byte[bytes.length]; + for (int i = offset; i < length + offset; i++) { + masked[i] = (byte) (bytes[i] ^ order.mask()); + } + return masked; + } + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + RowKeyUtils.seek(w, length); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + int offset = w.getOffset(); + byte[] serialized = w.get(); + + final byte[] unmasked = maskAll(serialized, order, offset, length); + + RowKeyUtils.seek(w, length); + + final BytesWritable result = new BytesWritable(); + result.set(unmasked, offset, length); + return result; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java new file mode 100644 index 0000000..76eb27e --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java @@ -0,0 +1,98 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.IntWritable; + +/** Serializes and deserializes IntWritables into a sortable fixed-length + * byte array representation. + * + *

This format ensures that all integers sort in their natural order, as + * they would sort when using signed integer comparison.

+ * + *

Serialization Format

+ * All Integers are serialized to a 4-byte, fixed-width sortable byte format. + * Serialization is performed by inverting the integer sign bit and writing the + * resulting bytes to the byte array in big endian order. + * + *

NULL

+ * Like all fixed-width integer types, this class does NOT support null + * value types. If you need null support use {@link IntWritableRowKey}. + * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte. + * + *

Usage

+ * This is the fastest class for storing fixed width 32-bit ints. Use + * {@link IntWritableRowKey} for a more compact, variable-length representation + * in almost all cases. This format is only more compact if integers most + * frequently require 28 or more bits to store (including the sign bit). + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedIntWritableRowKey extends RowKey +{ + private IntWritable iw; + + @Override + public Class getSerializedClass() { return IntWritable.class; } + + @Override + public int getSerializedLength(Object o) throws IOException { + return Bytes.SIZEOF_INT; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + + int i = ((IntWritable)o).get(); + Bytes.putInt(b, offset, i ^ Integer.MIN_VALUE ^ order.mask()); + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + int offset = w.getOffset(); + byte[] s = w.get(); + + int i = Bytes.toInt(s, offset) ^ Integer.MIN_VALUE ^ order.mask(); + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + + if (iw == null) + iw = new IntWritable(); + iw.set(i); + return iw; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java new file mode 100644 index 0000000..998587b --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** Serialize and deserialize Integer Objects into a fixed-length sortable + * byte array representation. + * + *

The serialization and deserialization methods are + * identical to {@link FixedIntWritableRowKey} after converting the IntWritable + * to/from an Integer.

+ * + *

Usage

+ * This is the slower class for storing ints. No copies are made when + * serializing and deserializing. Unfortunately Integer objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive ints are first passed to + * {@link Integer#valueOf}, so boxed Integer values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedIntegerRowKey extends FixedIntWritableRowKey +{ + private IntWritable iw; + + @Override + public Class getSerializedClass() { return Integer.class; } + + protected Object toIntWritable(Object o) { + if (o == null || o instanceof IntWritable) + return o; + if (iw == null) + iw = new IntWritable(); + iw.set((Integer)o); + return iw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toIntWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toIntWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + IntWritable iw = (IntWritable) super.deserialize(w); + if (iw == null) + return iw; + + return Integer.valueOf(iw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java new file mode 100644 index 0000000..8992c3a --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java @@ -0,0 +1,80 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +/** Serialize and deserialize Long Objects into a fixed-length sortable + * byte array representation. + * + *

The serialization and deserialization methods are identical to + * {@link FixedLongWritableRowKey} after converting the LongWritable to/from a + * Long.

+ * + *

Usage

+ * This is the slower class for storing longs. One copy is made when + * serializing and deserializing. Unfortunately Long objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive longs are first passed to + * {@link Long#valueOf}, so boxed Long values may be shared, reducing the + * copies on deserialization, if the valueOf method has frequent + * cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedLongRowKey extends FixedLongWritableRowKey +{ + private LongWritable lw; + + @Override + public Class getSerializedClass() { return Long.class; } + + protected Object toLongWritable(Object o) { + if (o == null || o instanceof LongWritable) + return o; + if (lw == null) + lw = new LongWritable(); + lw.set((Long)o); + return lw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toLongWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toLongWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + LongWritable lw = (LongWritable) super.deserialize(w); + if (lw == null) + return lw; + + return Long.valueOf(lw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java new file mode 100644 index 0000000..b0bbee3 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java @@ -0,0 +1,98 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.LongWritable; + +/** Serializes and deserializes LongWritables into a sortable + * fixed-length byte array representation. + * + *

This format ensures that all longs sort in their natural order, as + * they would sort when using signed longs comparison.

+ * + *

Serialization Format

+ * All longs are serialized to an 8-byte, fixed-width sortable byte format. + * Serialization is performed by inverting the long sign bit and writing the + * resulting bytes to the byte array in big endian order. + * + *

NULL

+ * Like all fixed-width integer types, this class does NOT support null + * value types. If you need null support use {@link LongWritableRowKey}. + * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte. + * + *

Usage

+ * This is the fastest class for storing fixed width 64-bit ints. Use + * {@link LongWritableRowKey} for a more compact, variable-length representation + * in almost all cases. This format is only more compact if longs most + * frequently require 59 or more bits to store (including the sign bit). + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedLongWritableRowKey extends RowKey +{ + private LongWritable lw; + + @Override + public Class getSerializedClass() { return LongWritable.class; } + + @Override + public int getSerializedLength(Object o) throws IOException { + return Bytes.SIZEOF_LONG; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + + long l = ((LongWritable)o).get(); + Bytes.putLong(b, offset, l ^ Long.MIN_VALUE ^ order.mask()); + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + int offset = w.getOffset(); + byte[] s = w.get(); + + long l = Bytes.toLong(s, offset) ^ Long.MIN_VALUE ^ order.mask(); + RowKeyUtils.seek(w, Bytes.SIZEOF_LONG); + + if (lw == null) + lw = new LongWritable(); + lw.set(l); + return lw; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java new file mode 100644 index 0000000..a785863 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java @@ -0,0 +1,61 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** Serialize and deserialize unsigned integers into fixed-width, sortable + * byte arrays. + * + *

The serialization and deserialization method are identical to + * {@link FixedIntWritableRowKey}, except that the sign bit of the integer is + * not negated during serialization.

+ * + *

Usage

+ * This is the fastest class for storing fixed width 32-bit unsigned ints. Use + * {@link UnsignedIntWritableRowKey} for a more compact, variable-length + * representation. This format is more compact only if integers most + * frequently require 28 bits or more bits to store. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedUnsignedIntWritableRowKey extends FixedIntWritableRowKey +{ + protected IntWritable invertSign(IntWritable iw) { + iw.set(iw.get() ^ Integer.MIN_VALUE); + return iw; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + invertSign((IntWritable)o); + super.serialize(o, w); + invertSign((IntWritable)o); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + return invertSign((IntWritable) super.deserialize(w)); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java new file mode 100644 index 0000000..9a21fd1 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** Serialize and deserialize Unsigned Integer Objects into a fixed-length + * sortable byte array representation. + * + *

The serialization and deserialization method are identical to + * {@link FixedUnsignedIntWritableRowKey} after converting the IntWritable + * to/from an Integer.

+ * + *

Usage

+ * This is the slower class for storing unsigned ints. Only one copy is made + * when serializing and deserializing, but unfortunately Integer objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive ints are first passed to + * {@link Integer#valueOf}, so boxed Integer values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedUnsignedIntegerRowKey extends FixedUnsignedIntWritableRowKey +{ + private IntWritable iw; + + @Override + public Class getSerializedClass() { return Integer.class; } + + protected Object toIntWritable(Object o) { + if (o == null || o instanceof IntWritable) + return o; + if (iw == null) + iw = new IntWritable(); + iw.set((Integer)o); + return iw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toIntWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toIntWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + IntWritable iw = (IntWritable) super.deserialize(w); + if (iw == null) + return iw; + + return Integer.valueOf(iw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java new file mode 100644 index 0000000..564a366 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +/** Serialize and deserialize Unsigned Long Objects into a fixed-length + * sortable byte array representation. + * + *

The serialization and deserialization methods are identical to + * {@link FixedUnsignedLongWritableRowKey} after converting the LongWritable + * to/from a Long

. + * + *

Usage

+ * This is the slower class for storing unsigned longs. One copy is made when + * serializing and deserializing. Unfortunately Long objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive longs are first passed to + * {@link Long#valueOf}, so boxed Long values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedUnsignedLongRowKey extends FixedUnsignedLongWritableRowKey +{ + private LongWritable lw; + + @Override + public Class getSerializedClass() { return Long.class; } + + protected Object toLongWritable(Object o) { + if (o == null || o instanceof LongWritable) + return o; + if (lw == null) + lw = new LongWritable(); + lw.set((Long)o); + return lw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toLongWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toLongWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + LongWritable lw = (LongWritable) super.deserialize(w); + if (lw == null) + return lw; + + return Long.valueOf(lw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java new file mode 100644 index 0000000..a62fb88 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java @@ -0,0 +1,61 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +/** Serialize and deserialize unsigned long integers into fixed-width, sortable + * byte arrays. + * + *

The serialization and deserialization method are identical to + * {@link FixedLongWritableRowKey}, except the sign bit of the long is not + * negated during serialization.

+ * + *

Usage

+ * This is the fastest class for storing fixed width 64-bit unsigned ints. Use + * {@link UnsignedLongWritableRowKey} for a more compact, variable-length + * representation. This format is more compact only if integers most frequently + * require 59 or more bits to store. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FixedUnsignedLongWritableRowKey extends FixedLongWritableRowKey +{ + protected LongWritable invertSign(LongWritable lw) { + lw.set(lw.get() ^ Long.MIN_VALUE); + return lw; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + invertSign((LongWritable)o); + super.serialize(o, w); + invertSign((LongWritable)o); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + return invertSign((LongWritable) super.deserialize(w)); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java new file mode 100644 index 0000000..bac9e0b --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.FloatWritable; + +/** Serializes and deserializes Floats into a sortable byte array + * representation. + * + *

The serialization and deserialization method are identical to + * {@link FloatWritableRowKey} after converting the FloatWritable to/from a + * Float.

+ * + *

Usage

+ * This is the slower class for storing floats. No copies are made when + * serializing and deserializing, but unfortunately Float objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive floats are first passed to + * {@link Float#valueOf}, so boxed Float values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FloatRowKey extends FloatWritableRowKey +{ + private FloatWritable fw; + + @Override + public Class getSerializedClass() { return Float.class; } + + protected Object toFloatWritable(Object o) { + if (o == null || o instanceof FloatWritable) + return o; + if (fw == null) + fw = new FloatWritable(); + fw.set((Float)o); + return fw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toFloatWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toFloatWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + FloatWritable fw = (FloatWritable) super.deserialize(w); + if (fw == null) + return fw; + + return Float.valueOf(fw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java new file mode 100644 index 0000000..cbce499 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java @@ -0,0 +1,172 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.FloatWritable; + +/** Serializes and deserializes FloatWritables into a sortable byte array + * representation. + * + *

This format ensures the following total ordering of floating point values: + * NULL < Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... + * < -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... + * < Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN

+ * + *

Serialization Format

+ *

Floating point numbers are encoded as specified in IEEE 754. A 32-bit + * single precision float consists of a sign bit, 8-bit unsigned exponent + * encoded in offset-127 notation, and a 23-bit significand. The format is + * described further in the + * Single Precision Floating Point Wikipedia page

+ * + *

The value of a normal float is + * -1 sign bit × 2exponent - 127 + * × 1.significand

+ * + *

The IEE754 floating point format already preserves sort ordering + * for positive floating point numbers when the raw bytes are compared in + * most significant byte order. This is discussed further at + * + * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm + *

+ * + *

Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is less + * than negative 1), and that all negative numbers compare less than any + * positive number. To accomplish this, we invert the sign bit of all floating + * point numbers, and we also invert the exponent and significand bits if the + * floating point number was negative.

+ * + *

More specifically, we first store the floating point bits into a 32-bit + * int j using {@link Float#floatToIntBits}. This method + * collapses all NaNs into a single, canonical NaN value but otherwise leaves + * the bits unchanged. We then compute

+ * + *
+ * j ^= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE
+ * 
+ * + *

which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of j in most significant byte + * order is equivalent to performing a single precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't + * compare equal to anything when performing floating point comparisons).

+ * + *

Finally, we must encode NULL efficiently. Fortunately, j can + * never have all of its bits set to one (equivalent to -1 signed in two's + * complement) as this value corresponds to a NaN removed during NaN + * canonicalization. Thus, we can encode NULL as zero, and all non-NULL + * numbers are translated to an int as specified above and then incremented by + * 1, which is guaranteed not to cause unsigned overflow as j must + * have at least one bit set to zero.

+ * + *

The resulting integer is then converted into a byte array by + * serializing the integer one byte at a time in most significant byte order. + * All serialized values are 4 bytes in length

+ * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we can + * encode NULL values as a zero-length byte array instead of using the 8 byte + * encoding specified above. Implicit termination is discussed further in + * {@link RowKey}. + * + *

Usage

+ * This is the fastest class for storing floats. It performs no object copies + * during serialization and deserialization. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class FloatWritableRowKey extends RowKey +{ + private static final int NULL = 0; + private FloatWritable fw; + + @Override + public Class getSerializedClass() { return FloatWritable.class; } + + @Override + public int getSerializedLength(Object o) throws IOException { + if (o == null && !terminate()) + return 0; + return Bytes.SIZEOF_INT; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + int j; + + if (o == null) { + if (!terminate()) + return; + j = NULL; + } else { + j = Float.floatToIntBits(((FloatWritable)o).get()); + j = (j ^ ((j >> Integer.SIZE - 1) | Integer.MIN_VALUE)) + 1; + } + + Bytes.putInt(b, offset, j ^ order.mask()); + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + if (w.getLength() <= 0) + return; + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] s = w.get(); + int offset = w.getOffset(); + if (w.getLength() <= 0) + return null; + + try { + int j = Bytes.toInt(s, offset) ^ order.mask(); + + if (j == NULL) + return null; + + if (fw == null) + fw = new FloatWritable(); + + j--; + j ^= (~j >> Integer.SIZE - 1) | Integer.MIN_VALUE; + fw.set(Float.intBitsToFloat(j)); + return fw; + } finally { + RowKeyUtils.seek(w, Bytes.SIZEOF_INT); + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java new file mode 100644 index 0000000..74830fc --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java @@ -0,0 +1,161 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; + +/** Serialize and deserialize signed, two's complement integers into a + * variable-length sortable byte format. + * + *

This format ensures that serialized values will sort in their natural + * sort order, as determined by (signed) integer comparison. NULL + * values compare less than any non-NULL value.

+ * + *

Serialization Format

+ * This variable-length format is a subclass of @{link AbstractVarIntRowKey}. + * The JavaDoc page for the parent class describes the high-level design of the + * general serialization format. The basic idea is to only encode only those + * bits that have values differing from the (explicit) sign bit. + * + *

Our encoding consists of a header byte followed by 0-4 data bytes. The + * data bytes are packed 8-bit data values in big-endian order. The header byte + * contains the sign bit, the number of data bytes, and the 3-6 most significant + * bits of data.

+ * + *

The header byte contains both header fields (sign, length) and data. Some + * header length fields may be omitted in shorter-length encodings, so smaller + * encodings contain more data bits in the header. In the case of single-byte + * encodings, the header byte contains 6 bits of data. For double-byte + * encodings, the header byte contains contains and 5 bits of data. All other + * encoding lengths contain 3 bits of data.

+ * + *

Thus we encode all numbers in two's complement using the sign bit in the + * header and 2H+D data bits, where H is the number of data bits in + * the header byte and D is the number of data bits in the data bytes + * (D = number of data bytes × 8).

+ * + *

More specifically, the numerical ranges for our variable-length byte + * encoding are: + *

+ * We support all values that can be represented in a java Integer, so N ≤ 5. + *

+ * + *

Reserved Bits

+ * Up to three of the most significant bits in the header may be reserved for + * use by the application, as three is the minimum number of data bits in the + * header byte. Reserved bits decrease the amount of data stored in the header + * byte, For example, a single byte encoding with two reserved bits can only encode + * integers in the range -16 ≤ x ≤ 15. + * + *

Full Header Format

+ * Given an integer, x: + *
+ * sign = x >> Integer.SIZE - 1
+ * negSign = ~sign
+ * 
+ * + * The full format of the header byte is + * + *
+ * Bit 7:    negSign
+ * Bit 6:    single-byte encoded ^ negSign
+ * Bit 5:    double-byte encoded ^ negSign
+ * Bits 3-4: len ^ sign (each bit XOR'd with original, unnegated sign bit)
+ * 
+ * + *

Bits 6 and 7 are used in all encodings. If bit 6 indicates a single byte + * encodng, then bits 0-5 are all data bits. Otherwise, bit 5 is used to + * indicate a double byte encoding. If a double byte encoding is used, then + * bits 0-4 are data bits. Otherwise, bits 3-4 specify the length of the + * extended length (> 2 byte) encoding. In all cases, bits 0-2 are data bits. + *

+ * + *

The len field represents the (extended) length of the encoded byte array + * minus 3, as all extended length serializations must be at least 3 bytes long. + * In other words, the encoded len field has a bias of +3, so an encoded + * field with value 1 represents a length of 4 bytes when decoded. + * The XOR's with sign and negSign are required to preserve sort ordering when + * using a big-endian byte array comparator to sort the encoded values.

+ * + *

Any padding is done with the sign bit. The worst case space overhead of + * this serialization format versus a standard fixed-length encoding is 1 additional + * byte. Note that if reserved bits are present, the above header layout is + * shifted right by the number of reserved bits.

+ * + *

Usage

+ * This is the fastest class for storing signed integers. It performs no + * copies during serialization and deserialization, + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class IntWritableRowKey extends AbstractVarIntRowKey +{ + /** Header flags */ + protected static final byte INT_SIGN = (byte) 0x80; + protected static final byte INT_SINGLE = (byte) 0x40; + protected static final byte INT_DOUBLE = (byte) 0x20; + + /** Header data bits for each header type */ + protected static final int INT_SINGLE_DATA_BITS = 0x6; + protected static final int INT_DOUBLE_DATA_BITS = 0x5; + protected static final int INT_EXT_DATA_BITS = 0x3; + + /** Extended (3-9) byte length attributes */ + /** Number of bits in the length field */ + protected static final int INT_EXT_LENGTH_BITS = 0x2; + + public IntWritableRowKey() { + super(INT_SINGLE, INT_SINGLE_DATA_BITS, INT_DOUBLE, + INT_DOUBLE_DATA_BITS, INT_EXT_LENGTH_BITS, + INT_EXT_DATA_BITS); + } + + @Override + public Class getSerializedClass() { return IntWritable.class; } + + @Override + Writable createWritable() { return new IntWritable(); } + + @Override + void setWritable(long x, Writable w) { ((IntWritable)w).set((int)x); } + + @Override + long getWritable(Writable w) { return ((IntWritable)w).get(); } + + @Override + long getSign(long l) { return l & Long.MIN_VALUE; } + + @Override + protected byte initHeader(boolean sign) { + return sign ? 0 : INT_SIGN; /* sign bit is negated in header */ + } + + @Override + protected byte getSign(byte h) { + return (h & INT_SIGN) != 0 ? 0 : Byte.MIN_VALUE; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java new file mode 100644 index 0000000..5c54837 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** Serializes and deserializes Integer objects into a variable-length + * sortable byte array representation. + * + *

The serialization and deserialization method are + * identical to {@link IntWritableRowKey} after converting the IntWritable + * to/from an Integer

. + * + *

Usage

+ * This is the slower class for storing ints. No copies are made when + * serializing and deserializing. Unfortunately Integer objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive ints are first passed to + * {@link Integer#valueOf}, so boxed Integer values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class IntegerRowKey extends IntWritableRowKey +{ + private IntWritable iw; + + @Override + public Class getSerializedClass() { return Integer.class; } + + protected Object toIntWritable(Object o) { + if (o == null || o instanceof IntWritable) + return o; + if (iw == null) + iw = new IntWritable(); + iw.set((Integer)o); + return iw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toIntWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toIntWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + IntWritable iw = (IntWritable) super.deserialize(w); + if (iw == null) + return iw; + + return Integer.valueOf(iw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java new file mode 100644 index 0000000..6496e9a --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java @@ -0,0 +1,81 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; +import java.math.BigDecimal; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** Serializes and deserializes {@link BigDecimal} Objects into a sortable byte + * array representation. + * + *

This class allows BigDecimal + * objects to be lazily deserialized, so that objects are allocated only + * when required. The serialization method is identical + * to {@link BigDecimalRowKey}. The deserialization methods returns + * an ImmutableBytesWritable object containing the raw serialized + * bytes. A separate method, {@link #getBigDecimal} (identical to + * {@link BigDecimalRowKey#deserialize}) is used to fully deserialize + * this byte array lazily on demand.

+ * + *

Usage

+ * This class is potentially faster than BigDecimalRowKey as + * deserialization is performed lazily. If some values do not have to be fully + * deserialized, then the client will not pay the object allocation and parsing + * costs for these values. If all values are fully deserialized, then this class + * provides no benefits. + * + *

Two copies are made when serializing and three when fully deserializing. + * If full deserialization is not required, then no copies are performed. + * Unfortunately BigDecimal objects are immutable, and cannot be re-used across + * multiple calls to the getBigDecimal method.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class LazyBigDecimalRowKey extends BigDecimalRowKey +{ + private ImmutableBytesWritable rawBytes; + + @Override + public Class getDeserializedClass() { + return ImmutableBytesWritable.class; + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + if (rawBytes == null) + rawBytes = new ImmutableBytesWritable(); + + rawBytes.set(w.get(), w.getOffset(), w.getLength()); + super.skip(w); + return rawBytes; + } + + /** Gets the BigDecimal stored in the current position of the + * byte array. After this method is called, the position (length) of the byte + * array will be incremented (decremented) by the length of the serialized + * BigDecimal. + */ + public BigDecimal getBigDecimal(ImmutableBytesWritable w) throws IOException { + return (BigDecimal)super.deserialize(w); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java new file mode 100644 index 0000000..9365525 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java @@ -0,0 +1,80 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +/** Serializes and deserializes Long objects into a variable-length + * sortable byte aray representation. + * + *

The serialization and deserialization method are identical to + * {@link LongWritableRowKey} after converting the LongWritable to/from a + * Long.

+ * + *

Usage

+ * This is the slower class for storing longs. No copies are made when + * serializing and deserializing. Unfortunately Long objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive longs are first passed to + * {@link Long#valueOf}, so boxed Long values may be shared, reducing the + * copies on deserialization, if the valueOf method has frequent + * cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class LongRowKey extends LongWritableRowKey +{ + private LongWritable lw; + + @Override + public Class getSerializedClass() { return Long.class; } + + protected Object toLongWritable(Object o) { + if (o == null || o instanceof LongWritable) + return o; + if (lw == null) + lw = new LongWritable(); + lw.set((Long)o); + return lw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toLongWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toLongWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + LongWritable lw = (LongWritable) super.deserialize(w); + if (lw == null) + return lw; + + return Long.valueOf(lw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java new file mode 100644 index 0000000..b67f0ee --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java @@ -0,0 +1,160 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +/** Serialize and deserialize signed, two's complement long integers into a + * variable-length sortable byte format. + * + *

This format ensures that serialized values will sort in their natural + * sort order, as determined by (signed) long integer comparison. NULL + * values compare less than any non-NULL value.

+ * + *

Serialization Format

+ * This variable-length format is a subclass of @{link AbstractVarIntRowKey}. + * The JavaDoc page for the parent class describes the high-level design of the + * general serialization format. The basic idea is to only encode only those + * bits that have values differing from the (explicit) sign bit. + * + *

Our encoding consists of a header byte followed by 0-8 data bytes. The + * data bytes are packed 8-bit data values in big-endian order. The header byte + * contains the sign bit, the number of data bytes, and the 2-6 most significant + * bits of data.

+ * + *

The header byte contains both header fields (sign, length) and data. Some + * header length fields may be omitted in shorter-length encodings, so smaller + * encodings contain more data bits in the header. In the case of single-byte + * encodings, the header byte contains 6 bits of data. For double-byte + * encodings, the header byte contains contains and 5 bits of data. All other + * encoding lengths contain 2 bits of data.

+ * + *

Thus we encode all numbers in two's complement using the sign bit in the + * header and 2H+D data bits, where H is the number of data bits in + * the header byte and D is the number of data bits in the data bytes + * (D = number of data bytes × 8).

+ * + *

More specifically, the numerical ranges for our variable-length byte + * encoding are: + *

+ * We support all values that can be represented in a java Long, so N ≤ 9. + *

+ * + *

Reserved Bits

+ * Up to two of the most significant bits in the header may be reserved for use + * by the application, as two is the minimum number of data bits in the header + * byte. Reserved bits decrease the amount of data stored in the header byte, + * For example, a single byte encoding with two reserved bits can only encode + * integers in the range -16 ≤ x ≤ 15. + * + *

Full Header Format

+ * Given a long integer, x: + *
+ * sign = x >> Long.SIZE - 1
+ * negSign = ~sign
+ * 
+ * + * The full format of the header byte is + * + *
+ * Bit 7:    negSign
+ * Bit 6:    single-byte encoded ^ negSign
+ * Bit 5:    double-byte encoded ^ negSign
+ * Bits 2-4: len ^ sign (each bit XOR'd with original, unnegated sign bit)
+ * 
+ * + *

Bits 6 and 7 are used in all encodings. If bit 6 indicates a single byte + * encodng, then bits 0-5 are all data bits. Otherwise, bit 5 is used to + * indicate a double byte encoding. If a double byte encoding is used, then + * bits 0-4 are data bits. Otherwise, bits 2-4 specify the length of the + * extended length (> 2 byte) encoding. In all cases, bits 0-1 are data bits. + *

+ * + *

The len field represents the (extended) length of the encoded byte array + * minus 3, as all extended length serializations must be at least 3 bytes long. + * In other words, the encoded len field has a bias of +3, so an encoded + * field with value 1 represents a length of 4 bytes when decoded. + * The XOR's with sign and negSign are required to preserve sort ordering when + * using a big-endian byte array comparator to sort the encoded values.

+ * + *

Any padding is done with the sign bit. The worst case space overhead of + * this serialization format versus a standard fixed-length encoding is 1 additional + * byte. Note that if reserved bits are present, the above header layout is + * shifted right by the number of reserved bits.

+ * + *

Usage

+ * This is the fastest class for storing signed long integers. It performs no + * copies during serialization and deserialization, + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class LongWritableRowKey extends AbstractVarIntRowKey +{ + /** Header flags */ + protected static final byte LONG_SIGN = (byte) 0x80; + protected static final byte LONG_SINGLE = (byte) 0x40; + protected static final byte LONG_DOUBLE = (byte) 0x20; + + /** Header data bits for each header type */ + protected static final int LONG_SINGLE_DATA_BITS = 0x6; + protected static final int LONG_DOUBLE_DATA_BITS = 0x5; + protected static final int LONG_EXT_DATA_BITS = 0x2; + + /** Extended (3-9) byte length attributes */ + /** Number of bits in the length field */ + protected static final int LONG_EXT_LENGTH_BITS = 0x3; + + public LongWritableRowKey() { + super(LONG_SINGLE, LONG_SINGLE_DATA_BITS, LONG_DOUBLE, + LONG_DOUBLE_DATA_BITS, LONG_EXT_LENGTH_BITS, + LONG_EXT_DATA_BITS); + } + + @Override + public Class getSerializedClass() { return LongWritable.class; } + + @Override + Writable createWritable() { return new LongWritable(); } + + @Override + void setWritable(long x, Writable w) { ((LongWritable)w).set(x); } + + @Override + long getWritable(Writable w) { return ((LongWritable)w).get(); } + + @Override + long getSign(long l) { return l & Long.MIN_VALUE; } + + @Override + protected byte initHeader(boolean sign) { + return sign ? 0 : LONG_SIGN; /* sign bit is negated in header */ + } + + @Override + protected byte getSign(byte h) { + return (h & LONG_SIGN) != 0 ? 0 : Byte.MIN_VALUE; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java new file mode 100644 index 0000000..4e849b9 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** The sort order of a row key, ascending or descending. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum Order +{ + ASCENDING((byte)0), + DESCENDING((byte)0xff); + + private final byte mask; + + Order(byte mask) { this.mask = mask; } + + /** Gets the byte mask associated with the sort order. When a + * serialized byte is XOR'd with the mask, the result is the same byte + * but sorted in the direction specified by the Order object. + * @see RowKey#serialize + */ + byte mask() { return mask; } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java new file mode 100644 index 0000000..f463c88 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java @@ -0,0 +1,207 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** Base class for translating objects to/from sort-order preserving byte + * arrays. + * + *

In contrast to other common object serialization methods, + * RowKey serializations use a byte array representation that + * preserves the object's natural sort ordering. Sorting the raw byte arrays + * yields the same sort order as sorting the actual objects themselves, without + * requiring the object to be instantiated. Using the serialized byte arrays + * as row keys in key-value stores such as HBase will sort rows in the natural + * sort order of the object.

+ * + *

Key types

+ * Primitive (single-value) key types are: variable-length signed/unsigned + * integers and longs, fixed-width signed/unsigned integers and longs, + * float/double, bigdecimal, and utf-8/text/String character strings. + * + *

Composite (multi-value) row key support is provided using struct row keys. + * You may have an arbitrary number of fields of any type, and each field + * may have its own sort order.

+ * + *

Order

+ * All keys may be sorted in ascending or descending order. + * + *

NULL

+ * Most keys support NULL values (only fixed-width integer/long types do not). + * All keys with NULL support treat the NULL value as comparing less than any + * non-NULL value for sort ordering purposes. + * + *

Termination

+ * Some row keys, such as character strings, require an explicit termination + * byte during serialization to indicate the end of the serialized value. + * This terminator byte can be omitted in some situations, such as during an + * ascending sort where the only serialized bytes come from the string row key. + * Omitting the explicit terminator byte is known as implicit termination, + * because the end of the serialized byte array implicitly terminates the + * serialized value. The {@link #setTermination} method can be used to + * control when termination is required. + * + *

If a row key is not forced to terminate, then during deserialization it + * will read bytes up until the end of the serialized byte array. This is safe + * if the row key serialized all of the bytes up to the end of the byte array + * (which is the common case). However, if the user has created a custom + * serialized format where their own extra bytes are appended to the byte array, + * then this would produce incorrect results and explicit termination should + * be forced.

+ * + *

The JavaDoc of each + * row key class describes the effects of implicit and explicit termination + * of the class's serialization. Note that the termination flag + * only affects serialization. For all row key types, deserialization and skip + * methods are able to detect values encoded in both implicit and explicit + * terminated formats, regardless of what the termination flag + * is set to.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class RowKey +{ + protected Order order; + protected Termination termination = Termination.AUTO; + private ImmutableBytesWritable w; + + public RowKey() { this.order = Order.ASCENDING; } + + /** Sets the sort order of the row key - ascending or descending. + */ + public RowKey setOrder(Order order) { this.order = order; return this; } + + /** Gets the sort order of the row key - ascending or descending */ + public Order getOrder() { return order; } + + /** Returns whether explicit termination in the serialized row key must be guaranteed + * in some fashion (such as a terminator byte or a self-describing length). + * Otherwise, the end of the byte array may serve as an implicit + * terminator. Defaults to "AUTO". + */ + public Termination getTermination() { return termination; } + + /** Sets the mustTerminate flag for this row key. Without explicit termination, + * the end of the byte array can be used to terminate encoded values. + */ + public RowKey setTermination(Termination termination) { + this.termination = termination; + return this; + } + + /** Returns true if termination is required */ + boolean terminate() { + switch (termination) { + case SHOULD_NOT: + return false; + case MUST: + return true; + case AUTO: + return order == Order.DESCENDING; + default: + throw new IllegalStateException("unknown termination " + termination); + } + } + + /** Gets the class of the object used for serialization. + * @see #serialize + */ + public abstract Class getSerializedClass(); + + /** Gets the class of the object used for deserialization. + * @see #deserialize + */ + public Class getDeserializedClass() { return getSerializedClass(); } + + /** Gets the length of the byte array when serializing an object. + * @param o object to serialize + * @return the length of the byte array used to serialize o + */ + public abstract int getSerializedLength(Object o) throws IOException; + + /** Serializes an object o to a byte array. When this + * method returns, the byte array's position will be adjusted by the number + * of bytes written. The offset (length) of the byte array is incremented + * (decremented) by the number of bytes used to serialize o. + * @param o object to serialize + * @param w byte array used to store the serialized object + */ + public abstract void serialize(Object o, ImmutableBytesWritable w) + throws IOException; + + public void serialize(Object o, byte[] b) throws IOException { + serialize(o, b, 0); + } + + public void serialize(Object o, byte[] b, int offset) throws IOException { + if (w == null) + w = new ImmutableBytesWritable(); + w.set(b, offset, b.length - offset); + serialize(o, w); + } + + public byte[] serialize(Object o) throws IOException { + byte[] b = new byte[getSerializedLength(o)]; + serialize(o, b, 0); + return b; + } + + /** Skips over a serialized key in the byte array. When this + * method returns, the byte array's position will be adjusted by the number of + * bytes in the serialized key. The offset (length) of the byte array is + * incremented (decremented) by the number of bytes in the serialized key. + * @param w the byte array containing the serialized key + */ + public abstract void skip(ImmutableBytesWritable w) throws IOException; + + /** Deserializes a key from the byte array. The returned object is an + * instance of the class returned by {@link #getSerializedClass}. When this + * method returns, the byte array's position will be adjusted by the number of + * bytes in the serialized key. The offset (length) of the byte array is + * incremented (decremented) by the number of bytes in the serialized key. + * @param w the byte array used for key deserialization + * @return the deserialized key from the current position in the byte array + */ + public abstract Object deserialize(ImmutableBytesWritable w) + throws IOException; + + public Object deserialize(byte[] b) throws IOException { + return deserialize(b, 0); + } + + public Object deserialize(byte[] b, int offset) throws IOException { + if (w == null) + w = new ImmutableBytesWritable(); + w.set(b, offset, b.length - offset); + return deserialize(w); + } + + /** Orders serialized byte b by XOR'ing it with the sort order mask. This + * allows descending sort orders to invert the byte values of the serialized + * byte stream. + */ + protected byte mask(byte b) { + return (byte) (b ^ order.mask()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java new file mode 100644 index 0000000..af0e603 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java @@ -0,0 +1,71 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.util.Arrays; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.Text; + +/** Various utility functions for creating and manipulating row keys. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RowKeyUtils +{ + /** Shared (immutable) zero-length byte array singleton. */ + public static final byte[] EMPTY = new byte[0]; + + /** Converts a (byte array, offset, length) triple into a byte array, + * copying only if necessary. No copy is performed if offset is 0 and + * length is array.length. + */ + public static byte[] toBytes(byte[] b, int offset, int length) { + if (offset == 0 && length == b.length) + return b; + else if (offset == 0) + return Arrays.copyOf(b, length); + return Arrays.copyOfRange(b, offset, offset + length); + } + + /** Converts an ImmutableBytesWritable to a byte array, copying only if + * necessary. + */ + public static byte[] toBytes(ImmutableBytesWritable w) { + return toBytes(w.get(), w.getOffset(), w.getLength()); + } + + /** Converts a Text object to a byte array, copying only if + * necessary. + */ + public static byte[] toBytes(Text t) { + return toBytes(t.getBytes(), 0, t.getLength()); + } + + /** Seeks forward/backward within an ImmutableBytesWritable. After + * seek is complete, the position (length) of the byte array is + * incremented (decremented) by the seek amount. + * @param w immutable byte array used for seek + * @param offset number of bytes to seek (relative to current position) + */ + public static void seek(ImmutableBytesWritable w, int offset) { + w.set(w.get(), w.getOffset() + offset, w.getLength() - offset); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java new file mode 100644 index 0000000..127d1b0 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java @@ -0,0 +1,71 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; + +/** Serialize and deserialize Java Strings into row keys. + * The serialization and deserialization method are identical to + * {@link UTF8RowKey} after converting the Java String to/from a UTF-8 byte + * array. + * + *

Usage

+ * This is the slowest class for storing characters and strings. One copy is + * made during serialization/deserialization, and furthermore the String + * objects themselves cannot be re-used across multiple deserializations. + * Weigh the cost of additional object instantiation + * and copying against the benefits of being able to use all of the various + * handy and tidy String functions in Java. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class StringRowKey extends UTF8RowKey +{ + @Override + public Class getSerializedClass() { return String.class; } + + protected Object toUTF8(Object o) { + if (o == null || o instanceof byte[]) + return o; + return Bytes.toBytes((String)o); + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toUTF8(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + super.serialize(toUTF8(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] b = (byte[]) super.deserialize(w); + return b == null ? b : Bytes.toString(b); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java new file mode 100644 index 0000000..7eb381c --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java @@ -0,0 +1,86 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** Builds {@link StructRowKey} objects. */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class StructBuilder +{ + protected List fields; + protected Order order; + + public StructBuilder() { + this.fields = new ArrayList(); + this.order = Order.ASCENDING; + } + + /** Adds a field row key to the struct definition. + * @param key field row key to append to the struct definition + * @return this object + */ + public StructBuilder add(RowKey key) { fields.add(key); return this; } + + /** Sets a struct field to the specified row key. Fields are numbered + * sequentially in the order they are added, starting from 0. + * @param i struct field definition index + * @param key row key assigned to field definition + * @return this object + */ + public StructBuilder set(int i, RowKey key) { + fields.set(i, key); + return this; + } + + /** Gets the field row key at field index i. */ + public RowKey get(int i) { return fields.get(i); } + + /** Gets all field row keys. */ + public List getFields() { return fields; } + + /** Sets the sort order of the struct. Default is ascending. */ + public StructBuilder setOrder(Order order) { + this.order = order; + return this; + } + + /** Gets the sort order of the struct definition. */ + public Order getOrder() { return order; } + + /** Creates a struct row key. */ + public StructRowKey toRowKey() { + RowKey[] fields = this.fields.toArray(new RowKey[0]); + return (StructRowKey) new StructRowKey(fields).setOrder(order); + } + + /** Resets the struct builder. Removes all fields, sets sort order to + * ascending. + */ + public StructBuilder reset() { + fields.clear(); + order = Order.ASCENDING; + return this; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java new file mode 100644 index 0000000..7c08482 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java @@ -0,0 +1,127 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** Iterates over a serialized {@link StructRowKey}. This iterates over each + * serialized row key field from the specified struct definition, and for each + * field allows you to deserialize the field or skip over its serialized bytes. + * In contrast, the methods provided by StructRowKey, + * {@link StructRowKey#deserialize} and {@link StructRowKey#skip}, + * deserialize or skip the entire struct at once (respectively). + * + *

A deserialized field has the same type as the field row key's deserialized + * class (specified by {@link RowKey#getDeserializedClass}). This iterator may + * also be used to deserialize bytes from any StructRowKey for + * which the specified row key is a prefix. For example, if the specified + * struct row key definition has a long and an integer field, you may parse the + * serialized output of a struct whose fields are a long, an integer, and a + * string. The iterator would return a long followed by an integer, and the + * trailing string would be ignored.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class StructIterator implements Iterator +{ + private StructRowKey rowKey; + private RowKey[] fields; + private int fieldPos, origOffset, origLength; + private ImmutableBytesWritable w; + + /** Creates a struct row key iterator. + * @param rowKey the struct row key type to use for deserialization + */ + public StructIterator(StructRowKey rowKey) { setRowKey(rowKey); } + + /** Creates a struct row key iterator. + * @param rowKey the struct row key type to use for deserialization + * @param bytes the serialized bytes to read from + */ + public StructIterator(StructRowKey rowKey, ImmutableBytesWritable bytes) { + setRowKey(rowKey); + setBytes(bytes); + } + + /** Creates a struct row key iterator. */ + public StructIterator() { } + + /** Sets the struct row key used for deserialization. */ + public StructIterator setRowKey(StructRowKey rowKey) { + this.rowKey = rowKey; + this.fields = rowKey.getFields(); + return this; + } + + /** Gets the struct row key used for deserialization. */ + public StructRowKey getRowKey() { return rowKey; } + + /** Sets the serialized byte array to read from. */ + public StructIterator setBytes(ImmutableBytesWritable w) { + this.w = w; + this.fieldPos = 0; + this.origOffset = w.getOffset(); + this.origLength = w.getLength(); + return this; + } + + /** Gets the serialized byte array to read from. The array offset and length + * are set to the current position. + */ + public ImmutableBytesWritable getBytes() { return w; } + + /** Resets the read position to the beginning of the serialized byte array */ + public void reset() { + this.fieldPos = 0; + if (w != null) + w.set(w.get(), origOffset, origLength); + } + + /** Skips the current field and increments read position by the number + * of bytes read. + */ + public void skip() throws IOException { fields[fieldPos++].skip(w); } + + /** Deserializes the current field and increments read position by the + * number of bytes read. + */ + public Object deserialize() throws IOException { + return fields[fieldPos++].deserialize(w); + } + + public boolean hasNext() { return fieldPos < fields.length; } + + public Object next() { + try { + if (!hasNext()) + throw new NoSuchElementException(); + return deserialize(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void remove() { throw new UnsupportedOperationException(); } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructRowKey.java new file mode 100644 index 0000000..fc8f223 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructRowKey.java @@ -0,0 +1,238 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** Serialize and deserialize a struct (record) row key into a sortable + * byte array. + * + *

A struct row key is a composed of a fixed number of fields. + * Each field is a subclass of {@link RowKey} (and may even be another struct). + * The struct is sorted by its field values in the order in which the fields + * are declared.

+ * + *

Structs allow for composite row keys. This is similar to a multi-column + * primary key or index in MySQL, where the primary key is a struct and the + * columns are field row keys.

+ * + *

Serialization Format

+ * The struct row key does not serialize any additional bytes beyond those + * used to serialize each of its fields in sequential order. The struct + * serializes an array of values by simply serializing the first value using + * the first row key, followed by the second value serialized with the second + * row key, and so forth. No bytes are serialized directly by the struct class + * at all, so no additional bytes are inserted before, after, or in between + * values serialized by the field row keys. In all cases except the implicit + * termination cases mentioned below, field row keys have {@link #termination} + * set to true to force each field row key to be self-terminating. + * + *

Implicit and Explicit Termination

+ * The struct row key does not directly serialize any bytes and thus + * termination flags do not directly affect the serialization. Instead, + * the struct row controls explicit and implicit termination settings by + * manipulating the must terminate flags of the field row keys using + * {@link #setTermination}. + * + *

If mustTerminate is true, then the mustTerminate flag is set + * to true in all field row keys. However, if mustTerminate + * is false, we can set the must terminate flag to false for any field row key + * that is followed by an uninterrupted trailing suffix of ascending-sorted + * NULL values. This is because NULL values encoded via ascending sort + * are zero-length in all row key formats (assuming mustTerminate + * is false). In the extreme, a struct with must terminate set to false and all + * fields in ascending sort order would serialize a set of null objects to a + * zero-byte array. All other field row keys (those that are followed by any + * serialized value with non-zero length) will have the must terminate flag set + * to true in the field row key.

+ * + *

Struct Prefixes

+ * Given a serialized struct row key, you may deserialize fields from the + * serialized byte representation using any prefix of the struct row + * key. For example, if a struct consisting of a long, string, and float + * is serialized, then fields may be deserialized using a row key consisting of + * a long and string, or by using a row key consisting of a single + * long. Any trailing fields omitted from the prefix row key will be ignored + * during deserialization. In the above example, the (long, string) row key + * would return a two-element (long object, string object) deserialized result, + * ignoring the trailing serialized float as this was not included in the + * struct prefix's definition. + * + *

NULL

+ * Structs themselves may not be NULL. However, struct fields may be NULL + * (so long as the underlying field row key supports NULL), so you can create a + * struct where every field is NULL. + * + *

Descending sort

+ * To sort in descending order we invert the sort order of each field row key. + * + *

Usage

+ * Structs impose no extra space during serialization, or object copy overhead + * at runtime. The storage and runtime costs of a struct row key are the + * sum of the costs of each of its field row keys. + * + * @see StructIterator + * @see StructBuilder + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class StructRowKey extends RowKey implements Iterable +{ + private RowKey[] fields; + private Object[] v; + private StructIterator iterator; + private ImmutableBytesWritable iw; + + /** Creates a struct row key object. + * @param fields - the field row keys of the struct (in declaration order) + */ + public StructRowKey(RowKey[] fields) { setFields(fields); } + + @Override + public RowKey setOrder(Order order) { + if (order == getOrder()) + return this; + + super.setOrder(order); + for (RowKey field : fields) + field.setOrder(field.getOrder() == Order.ASCENDING ? Order.DESCENDING : + Order.ASCENDING); + return this; + } + + /** Sets the field row keys. + * @param fields the fields of the struct (in declaration order) + * @return this object + */ + public StructRowKey setFields(RowKey[] fields) { + this.fields = fields; + return this; + } + + /** Gets the field row keys. */ + public RowKey[] getFields() { return fields; } + + @Override + public Class getSerializedClass() { return Object[].class; } + + private Object[] toValues(Object obj) { + Object[] o = (Object[]) obj; + if (o.length != fields.length) + throw new IndexOutOfBoundsException("Expected " + fields.length + + " values but got " + o.length + " values"); + return o; + } + + /** Initializes mustTerminate in each field row key for the + * specified field values. As a side effect of this computation, the + * serialized length of the object is computed. + * @param o field values + * @return the serialized length of the field values + */ + private int setTerminateAndGetLength(Object[] o) throws IOException { + int len = 0; + Termination fieldTerm = termination; + + /* We must terminate a field f if (i) mustTerminate is true for this + * struct or (ii) any field after f has a non-zero deserialized length + */ + for (int i = o.length - 1; i >= 0; i--) { + if (fieldTerm == Termination.SHOULD_NOT + // SHOULD_NOT always wins + || fields[i].getTermination() != Termination.SHOULD_NOT) + fields[i].setTermination(fieldTerm); + int objLen = fields[i].getSerializedLength(o[i]); + if (objLen > 0) { + fieldTerm = Termination.MUST; + len += objLen; + } + } + + return len; + } + + @Override + public int getSerializedLength(Object obj) throws IOException { + return setTerminateAndGetLength(toValues(obj)); + } + + @Override + public void serialize(Object obj, ImmutableBytesWritable w) + throws IOException + { + Object[] o = toValues(obj); + setTerminateAndGetLength(o); + for (int i = 0; i < o.length; i++) + fields[i].serialize(o[i], w); + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + for (int i = 0; i < fields.length; i++) + fields[i].skip(w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + if (v == null) + v = new Object[fields.length]; + for (int i = 0; i < fields.length; i++) + v[i] = fields[i].deserialize(w); + return v; + } + + /** Sets the serialized row key to iterate over. Subsequent calls to + * {@link #iterator} will iterate over this row key. + * @param iw serialized row key bytes to use for iteration + * @return this object + * @see #iterator + */ + + public StructRowKey iterateOver(ImmutableBytesWritable iw) { + this.iw = iw; + return this; + } + + public StructRowKey iterateOver(byte[] b, int offset) { + if (iw == null) + iw = new ImmutableBytesWritable(); + iw.set(b, offset, b.length - offset); + return this; + } + + public StructRowKey iterateOver(byte[] b) { return iterateOver(b, 0); } + + /** Iterates over a serialized row key. Re-uses the same iterator object + * across method calls. + * @see StructIterator + * @see #iterateOver + * @return an iterator for w + */ + public StructIterator iterator() { + if (iterator == null) + iterator = new StructIterator(this); + iterator.reset(); + iterator.setBytes(iw); + return iterator; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Termination.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Termination.java new file mode 100644 index 0000000..8fc675e --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Termination.java @@ -0,0 +1,45 @@ +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + *

Termination

+ * Some row keys, such as character strings, require an explicit termination + * byte during serialization to indicate the end of the serialized value. + * This terminator byte can be omitted in some situations, such as during an + * ascending sort where the only serialized bytes come from the string row key. + * Omitting the explicit terminator byte is known as implicit termination, + * because the end of the serialized byte array implicitly terminates the + * serialized value. The {@link RowKey#setTermination} method can be used to + * control when termination is required. + * + *

If a row key is not forced to terminate, then during deserialization it + * will read bytes up until the end of the serialized byte array. This is safe + * if the row key serialized all of the bytes up to the end of the byte array + * (which is the common case). However, if the user has created a custom + * serialized format where their own extra bytes are appended to the byte array, + * then this would produce incorrect results and explicit termination should + * be forced.

+ * + *

The JavaDoc of each + * row key class describes the effects of implicit and explicit termination + * of the class's serialization. Note that the termination flag + * only affects serialization. For all row key types, deserialization and skip + * methods are able to detect values encoded in both implicit and explicit + * terminated formats, regardless of what the termination flag + * is set to.

+ * + *

There are three possible values for the mustTerminate flag: + * AUTO, MUST or SHOULD_NOT. AUTO will only use termination if really + * necessary, MUST always writes termination bytes and SHOULD_NOT never writes + * them. Using SHOULD_NOT implies you have to know what you are doing! This can + * result in ambiguous rowkeys.

+ * + *

Note that SHOULD_NOT is stronger than MUST.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum Termination { + AUTO, MUST, SHOULD_NOT +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/TextRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/TextRowKey.java new file mode 100644 index 0000000..94a2d93 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/TextRowKey.java @@ -0,0 +1,77 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.Text; + +/** Serialize and deserialize Hadoop Text Objects into sortable byte arrays. + * The serialization and deserialization method are identical to + * {@link UTF8RowKey} after converting the Text to/from a UTF-8 byte + * array. + * + *

Usage

+ * This is the second fastest class for storing characters and strings. No + * objetc copies are made when serializing. Unfortunately, there is no way to + * force a Text object to use an existing byte array without copying its + * contents, so one copy is required when deserializing. This class re-uses + * Text objects during deserialization, although the byte array + * backing the Text object is not re-used. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class TextRowKey extends UTF8RowKey +{ + private Text t; + + @Override + public Class getSerializedClass() { return Text.class; } + + protected Object toUTF8(Object o) { + if (o == null || o instanceof byte[]) + return o; + return RowKeyUtils.toBytes((Text)o); + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toUTF8(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toUTF8(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] b = (byte[]) super.deserialize(w); + if (b == null) + return b; + + if (t == null) + t = new Text(); + t.set(b); + return t; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UTF8RowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UTF8RowKey.java new file mode 100644 index 0000000..4753088 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UTF8RowKey.java @@ -0,0 +1,160 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** Serialize and deserialize UTF-8 byte arrays into a sortable byte array + * representation. + * + *

This format sorts strings in lexicographic order. The sort order ensures + * that NULL < empty string < Character.MIN_CODE_POINT < aa < aaa + * < b < ba < bb < ... < Character.MAX_CODE_POINT < ...

+ * + *

Serialization Format

+ *

The UTF-8 format already guarantees that sorting byte arrays of UTF-8 + * encoded characters is equivalent to sorting the equivalent decoded Unicode + * strings by Unicode code point. This is discussed further in the + * UTF-8 Wikipedia article. + * As a historical aside, this nifty and very useful property of UTF-8 is due to + * Ken Thompson and Rob Pike of Unix fame. UTF-8 has many other awesome + * properties, like being fully self-synchronized.

+ * + *

However, we also need to encode NULL and an end of string terminator. + * When sorting, we must ensure that NULL < terminator < any valid + * UTF-8 byte so that strings sort in the correct order. Fortunately, a simple + * solution is available to us for encoding NULL and terminator bytes. UTF-8 + * encoding will never produce the byte values 0xff or + * 0xfe. Thus, we may reserve 0x00 for NULL and + * 0x01 for terminator if we add 2 to each UTF-8 byte when + * serializing the UTF-8 byte array.

+ * + * To encode a NULL, we output 0x0 and return. Otherwise, to encode a non-NULL + * UTF-8 byte array we add 2 to each of the raw utf-8 bytes and then append the + * terminator byte at the end. Decoding is simply the reverse of the above + * operations. + * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte, + * including the null and termination bytes. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we + * encode NULL values as a zero-length byte array, and omit the terminator byte + * for every string except the empty string. In this case, our format has zero + * bytes of overhead versus encoding the raw UTF-8 bytes. The end of the byte + * array serves as an implicit terminator byte. Implicit termination is + * discussed further in {@link RowKey}. + * + *

Usage

+ * This is the fastest class for storing characters and strings. + * It performs no object copies during serialization or deserialization. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UTF8RowKey extends RowKey +{ + private static final byte NULL = (byte)0x00, + TERMINATOR = (byte)0x01; + + @Override + public Class getSerializedClass() { return byte[].class; } + + @Override + public int getSerializedLength(Object o) throws IOException { + int term = terminate() ? 1 : 0; + return o == null ? term : Math.max(((byte[])o).length + term, 1); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + byte[] b = w.get(); + int offset = w.getOffset(); + + if (o == null) { + if (terminate()) { + b[offset] = mask(NULL); + RowKeyUtils.seek(w, 1); + } + return; + } + + byte[] s = (byte[]) o; + int len = s.length; + + for (int i = 0; i < len; i++) + b[offset + i] = mask((byte)(s[i] + 2)); + + boolean terminated = terminate() || len == 0; + if (terminated) + b[offset + len] = mask(TERMINATOR); + RowKeyUtils.seek(w, len + (terminated ? 1 : 0)); + } + + protected int getUTF8RowKeyLength(ImmutableBytesWritable w) { + byte[] b = w.get(); + int offset = w.getOffset(), + len = w.getLength(); + + if (len <= 0) + return 0; + if (b[offset] == mask(NULL)) + return 1; + + int i = 0; + while (i < len && b[offset + i++] != mask(TERMINATOR)) ; + return i; + } + + @Override + public void skip(ImmutableBytesWritable w) throws IOException { + RowKeyUtils.seek(w, getUTF8RowKeyLength(w)); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + byte[] s = w.get(); + int offset = w.getOffset(); + if (w.getLength() <= 0) + return null; + + int len = getUTF8RowKeyLength(w); + try { + if (s[offset] == mask(NULL)) + return null; + if (s[offset] == mask(TERMINATOR)) + return RowKeyUtils.EMPTY; + + boolean terminated = s[offset + len - 1] == mask(TERMINATOR); + byte[] b = new byte[len - (terminated ? 1 : 0)]; + for (int i = 0; i < b.length; i++) + b[i] = (byte) (mask(s[offset + i]) - 2); + return b; + } finally { + RowKeyUtils.seek(w, len); + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntWritableRowKey.java new file mode 100644 index 0000000..4313849 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntWritableRowKey.java @@ -0,0 +1,158 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Writable; + +/** Serialize and deserialize unsigned integers into a variable-length + * sortable byte format. + * + *

This format ensures that serialized values will sort in their natural + * sort order, as determined by (unsigned) integer comparison. NULL + * values compare less than any non-NULL value. Although we are serializing + * and deserializing values with a java int, we treat the most significant bit + * of the int as a data bit, not a sign bit, because this is an unsigned data + * serialization format.

+ * + *

Serialization Format

+ * This variable-length format is a subclass of {@link AbstractVarIntRowKey}. + * The JavaDoc page for the parent class describes the high-level design of the + * general serialization format. The basic idea is to only encode only those + * bits that have values differing from the implicit zero-valued sign bit + * (all unsigned integers effectively have an implied sign bit of zero). + * + *

Our encoding consists of a header byte followed by 0-4 data bytes. The + * data bytes are packed 8-bit data values in big-endian order. The header byte + * contains the number of serialized data bytes, and the 4-7 most significant + * bits of data.

+ * + *

The header byte contains both header fields (byte length) and data. Some + * header length fields may be omitted in shorter-length encodings, so smaller + * encodings contain more data bits in the header. In the case of single-byte + * encodings, the header byte contains 7 bits of data. For double-byte + * encodings, the header byte contains contains and 6 bits of data. All other + * encoding lengths contain 4 bits of data.

+ * + *

Thus we encode all numbers using the 2H+D data bits, + * where H is the number of data bits in the header byte and D is the number of + * data bits in the data bytes (D = number of data bytes × 8).

+ * + *

More specifically, the numerical ranges for our variable-length byte + * encoding are: + *

    + *
  • One byte: -128 ≤ x ≤ 127 + *
  • Two bytes: -16384 ≤ x ≤ 16383 + *
  • N > 2 bytes: -28 × (N-1) + 4 ≤ x + * ≤ 28 × (N-1) + 4 - 1 + *
+ * We support all values that can be represented in a java Integer, so N ≤ 5. + *

+ * + *

Reserved Bits

+ * Up to four of the most significant bits in the header may be reserved for + * use by the application, as four is the minimum number of data bits in the + * header byte. Reserved bits decrease the amount of data stored in the header + * byte. For example, a single byte encoding with two reserved bits can only + * encode integers in the range -32 ≤ x ≤ 31. + * + *

Full Header Format

+ * The full format of the header byte is (note: ~ represents logical negation) + *
+ * Bit 7:    ~single-byte encoded 
+ * Bit 6:    ~double-byte encoded 
+ * Bits 4-5: len 
+ * 
+ * + *

Bit 7 is used in all encodings. If bit 7 indicates a single byte + * encodng, then bits 0-6 are all data bits. Otherwise, bit 6 is used to + * indicate a double byte encoding. If a double byte encoding is used, then + * bits 0-5 are data bits. Otherwise, bits 4-5 specify the length of the + * extended length (> 2 byte) encoding. In all cases, bits 0-3 are data bits. + *

+ * + *

The len field represents the (extended) length of the encoded byte array + * minus 3, as all extended length serializations must be at least 3 bytes long. + * In other words, the encoded len field has a bias of +3, so an encoded + * field with value 1 represents a length of 4 bytes when decoded.

+ * + *

Any padding is done with a clear (zero) bit. The worst case space overhead + * of this serialization format versus a standard fixed-length encoding is 1 + * additional byte. Note that if reserved bits are present, the above header + * layout is shifted right by the number of reserved bits.

+ * + *

Usage

+ * This is the fastest class for storing unsigned integers. It performs no + * copies during serialization and deserialization, + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnsignedIntWritableRowKey extends AbstractVarIntRowKey +{ + /** Header flags */ + protected static final byte ULONG_SINGLE = (byte) 0x80; + protected static final byte ULONG_DOUBLE = (byte) 0x40; + + /** Header data bits for each header type */ + protected static final int ULONG_SINGLE_DATA_BITS = 0x7; + protected static final int ULONG_DOUBLE_DATA_BITS = 0x6; + protected static final int ULONG_EXT_DATA_BITS = 0x4; + + /** Extended (3-9) byte length attributes */ + /** Number of bits in the length field */ + protected static final int ULONG_EXT_LENGTH_BITS = 0x2; + + public UnsignedIntWritableRowKey() { + super(ULONG_SINGLE, ULONG_SINGLE_DATA_BITS, ULONG_DOUBLE, + ULONG_DOUBLE_DATA_BITS, ULONG_EXT_LENGTH_BITS, + ULONG_EXT_DATA_BITS); + } + + @Override + public Class getSerializedClass() { return IntWritable.class; } + + @Override + Writable createWritable() { return new IntWritable(); } + + @Override + void setWritable(long x, Writable w) { ((IntWritable)w).set((int)x); } + + @Override + long getWritable(Writable w) { + int i = ((IntWritable)w).get(); + return ((long)i) & 0xffffffffL; + } + + @Override + long getSign(long l) { return 0; } + + @Override + protected byte initHeader(boolean sign) { return 0; } + + @Override + protected byte getSign(byte h) { return 0; } + + @Override + protected byte serializeNonNullHeader(byte b) { return (byte) (b + 1); } + + @Override + protected byte deserializeNonNullHeader(byte b) { return (byte) (b - 1); } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntegerRowKey.java new file mode 100644 index 0000000..326ff81 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedIntegerRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** Serializes and deserializes Unsigned Integer objects into a variable-length + * sortable byte aray representation. + * + *

The serialization and deserialization methods are identical to + * {@link UnsignedIntWritableRowKey} after converting the IntWritable to/from + * an Integer

. + * + *

Usage

+ * This is the slower class for storing unsigned ints. No copies are made + * when serializing and deserializing, but unfortunately Integer objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive ints are first passed to + * {@link Integer#valueOf}, so boxed Integer values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnsignedIntegerRowKey extends UnsignedIntWritableRowKey +{ + private IntWritable iw; + + @Override + public Class getSerializedClass() { return Integer.class; } + + protected Object toIntWritable(Object o) { + if (o == null || o instanceof IntWritable) + return o; + if (iw == null) + iw = new IntWritable(); + iw.set((Integer)o); + return iw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toIntWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toIntWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + IntWritable iw = (IntWritable) super.deserialize(w); + if (iw == null) + return iw; + + return Integer.valueOf(iw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongRowKey.java new file mode 100644 index 0000000..82fa1ea --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongRowKey.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +/** Serializes and deserializes Unsigned Long objects into a variable-length + * sortable byte aray representation. + * + *

The serialization and deserialization methods are identical to + * {@link UnsignedLongWritableRowKey} after converting the LongWritable to/from + * a Long.

+ * + *

Usage

+ * This is the slower class for storing unsigned longs. No copies are made when + * serializing and deserializing. Unfortunately Long objects are + * immutable and thus cannot be re-used across multiple deserializations. + * However, deserialized primitive longs are first passed to + * {@link Long#valueOf}, so boxed Long values may be shared if the + * valueOf method has frequent cache hits. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnsignedLongRowKey extends UnsignedLongWritableRowKey +{ + private LongWritable lw; + + @Override + public Class getSerializedClass() { return Long.class; } + + protected Object toLongWritable(Object o) { + if (o == null || o instanceof LongWritable) + return o; + if (lw == null) + lw = new LongWritable(); + lw.set((Long)o); + return lw; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toLongWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toLongWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + LongWritable lw = (LongWritable) super.deserialize(w); + if (lw == null) + return lw; + + return Long.valueOf(lw.get()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongWritableRowKey.java new file mode 100644 index 0000000..35436a7 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/UnsignedLongWritableRowKey.java @@ -0,0 +1,155 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +/** Serialize and deserialize unsigned long integers into a variable-length + * sortable byte format. + * + *

This format ensures that serialized values will sort in their natural + * sort order, as determined by (unsigned) long integer comparison. NULL + * values compare less than any non-NULL value. Although we are serializing + * and deserializing values with a java long, we treat the most significant bit + * of the long as a data bit, not a sign bit, because this is an unsigned data + * serialization format.

+ * + *

Serialization Format

+ * This variable-length format is a subclass of {@link AbstractVarIntRowKey}. + * The JavaDoc page for the parent class describes the high-level design of the + * general serialization format. The basic idea is to only encode only those + * bits that have values differing from the implicit zero-valued sign bit + * (all unsigned integers effectively have an implied sign bit of zero). + * + *

Our encoding consists of a header byte followed by 0-8 data bytes. The + * data bytes are packed 8-bit data values in big-endian order. The header byte + * contains the number of serialized data bytes, and the 3-7 most significant + * bits of data.

+ * + *

The header byte contains both header fields (byte length) and data. Some + * header length fields may be omitted in shorter-length encodings, so smaller + * encodings contain more data bits in the header. In the case of single-byte + * encodings, the header byte contains 7 bits of data. For double-byte + * encodings, the header byte contains contains and 6 bits of data. All other + * encoding lengths contain 3 bits of data.

+ * + *

Thus we encode all numbers using the 2H+D data bits, + * where H is the number of data bits in the header byte and D is the number of + * data bits in the data bytes (D = number of data bytes × 8).

+ * + *

More specifically, the numerical ranges for our variable-length byte + * encoding are: + *

    + *
  • One byte: -128 ≤ x ≤ 127 + *
  • Two bytes: -16384 ≤ x ≤ 16383 + *
  • N > 2 bytes: -28 × (N-1) + 3 ≤ x + * ≤ 28 × (N-1) + 3 - 1 + *
+ * We support all values that can be represented in a java Long, so N ≤ 9. + *

+ * + *

Reserved Bits

+ * Up to three of the most significant bits in the header may be reserved for + * use by the application, as three is the minimum number of data bits in the + * header byte. Reserved bits decrease the amount of data stored in the header + * byte. For example, a single byte encoding with two reserved bits can only + * encode integers in the range -32 ≤ x ≤ 31. + * + *

Full Header Format

+ * The full format of the header byte is (note: ~ represents logical negation) + *
+ * Bit 7:    ~single-byte encoded 
+ * Bit 6:    ~double-byte encoded 
+ * Bits 3-5: len 
+ * 
+ * + *

Bit 7 is used in all encodings. If bit 7 indicates a single byte + * encodng, then bits 0-6 are all data bits. Otherwise, bit 6 is used to + * indicate a double byte encoding. If a double byte encoding is used, then + * bits 0-5 are data bits. Otherwise, bits 3-5 specify the length of the + * extended length (> 2 byte) encoding. In all cases, bits 0-2 are data bits. + *

+ * + *

The len field represents the (extended) length of the encoded byte array + * minus 3, as all extended length serializations must be at least 3 bytes long. + * In other words, the encoded len field has a bias of +3, so an encoded + * field with value 1 represents a length of 4 bytes when decoded.

+ * + *

Any padding is done with a clear (zero) bit. The worst case space overhead + * of this serialization format versus a standard fixed-length encoding is 1 + * additional byte. Note that if reserved bits are present, the above header + * layout is shifted right by the number of reserved bits.

+ * + *

Usage

+ * This is the fastest class for storing unsigned long integers. It performs no + * copies during serialization and deserialization, + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class UnsignedLongWritableRowKey extends AbstractVarIntRowKey +{ + /** Header flags */ + protected static final byte ULONG_SINGLE = (byte) 0x80; + protected static final byte ULONG_DOUBLE = (byte) 0x40; + + /** Header data bits for each header type */ + protected static final int ULONG_SINGLE_DATA_BITS = 0x7; + protected static final int ULONG_DOUBLE_DATA_BITS = 0x6; + protected static final int ULONG_EXT_DATA_BITS = 0x3; + + /** Extended (3-9) byte length attributes */ + /** Number of bits in the length field */ + protected static final int ULONG_EXT_LENGTH_BITS = 0x3; + + public UnsignedLongWritableRowKey() { + super(ULONG_SINGLE, ULONG_SINGLE_DATA_BITS, ULONG_DOUBLE, + ULONG_DOUBLE_DATA_BITS, ULONG_EXT_LENGTH_BITS, + ULONG_EXT_DATA_BITS); + } + + @Override + public Class getSerializedClass() { return LongWritable.class; } + + @Override + Writable createWritable() { return new LongWritable(); } + + @Override + void setWritable(long x, Writable w) { ((LongWritable)w).set(x); } + + @Override + long getWritable(Writable w) { return ((LongWritable)w).get(); } + + @Override + long getSign(long l) { return 0; } + + @Override + protected byte initHeader(boolean sign) { return 0; } + + @Override + protected byte getSign(byte h) { return 0; } + + @Override + protected byte serializeNonNullHeader(byte b) { return (byte) (b + 1); } + + @Override + protected byte deserializeNonNullHeader(byte b) { return (byte) (b - 1); } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthByteArrayRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthByteArrayRowKey.java new file mode 100644 index 0000000..f570608 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthByteArrayRowKey.java @@ -0,0 +1,84 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.BytesWritable; + +/** + * Serialize and deserialize byte arrays into a variable-length byte array. + *

+ * The serialization and deserialization methods are identical to + * {@link VariableLengthBytesWritableRowKey} after converting the BytesWritable + * to/from a byte[]. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class VariableLengthByteArrayRowKey extends VariableLengthBytesWritableRowKey { + + public VariableLengthByteArrayRowKey() { + } + + public VariableLengthByteArrayRowKey(int fixedPrefixLength) { + super(fixedPrefixLength); + } + + @Override + public Class getSerializedClass() { + return byte[].class; + } + + protected Object toBytesWritable(Object o) { + if (o == null || o instanceof BytesWritable) + return o; + else { + final BytesWritable bw = new BytesWritable(); + final byte[] bytes = (byte[]) o; + bw.set(bytes, 0, bytes.length); + return bw; + } + } + + @Override + public int getSerializedLength(Object o) throws IOException { + return super.getSerializedLength(toBytesWritable(o)); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException { + super.serialize(toBytesWritable(o), w); + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + BytesWritable bw = (BytesWritable) super.deserialize(w); + if (bw == null) { + return null; + } else { + final byte[] result = new byte[bw.getLength()]; + System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength()); + + return result; + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthBytesWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthBytesWritableRowKey.java new file mode 100644 index 0000000..133a500 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/VariableLengthBytesWritableRowKey.java @@ -0,0 +1,407 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.primitives.UnsignedBytes; + +/** + * Serializes and deserializes BytesWritable into a sortable variable length + * representation with an optional fixed length prefix (on which no encoding + * will be applied). + * + *

Serialization Format

+ *

+ * Variable length byte arrays can not be written simply as the original byte + * array, because this would leave us without any usable byte (or byte + * sequence) to mark the end of the byte array. We can also not simply write + * the length of the array in a fixed number of bytes in the beginning of the + * output, because this would mean the rowkeys can no longer be sorted and + * "prefix searched". This impacts things like splitting tables over regions + * etc. + *

+ * Therefore we use a variant of packed binary coded decimal (BCD). We start by + * interpreting the byte array as an unsigned variable length integer number + * represented in decimal format (using only digits 0 - 9). Packed BCD encodes + * each digit (0 - 9) into a 4 bit nibble (binary 0000 - 1001) and "packs" two + * of these nibbles into one byte. + *

+ * BCD only uses the byte range (binary) 00000000 - 10011001. This results in + * an uneven distribution of values over the whole possible range, resulting + * in problems with splitting tables over regions based on predefined byte + * ranges for each region. To solve this, we modified BCD with a mapping from + * digits to 4 bit values which uses the range more evenly. Furthermore, we + * reserve a number of small nibbles for special purposes. + *

+ * The modified mapping is: + *

+ * digit 0 = binary 0011 (0x03)
+ * digit 1 = binary 0100 (0x04)
+ * digit 2 = binary 0101 (0x05)
+ * digit 3 = binary 0110 (0x06)
+ * digit 4 = binary 0111 (0x07)
+ * digit 5 = binary 1001 (0x09)
+ * digit 6 = binary 1010 (0x0A)
+ * digit 7 = binary 1100 (0X0C)
+ * digit 8 = binary 1110 (0x0E)
+ * digit 9 = binary 1111 (0x0F)
+ * 
+ * + * This also means that a number of values are "available" as terminator, + * filler and to encode NULL values , we've chosen the nibble 0000 for NULL + * values and the nibble 0001 as terminator. This ensures that shorter byte + * arrays that are the prefix of a longer byte array will always compare less + * than the longer string, as the terminator is a smaller value that any + * decimal digit. + *

+ * Furthermore, we also want that if we encode two byte arrays of the same + * length, that the resulting encoded byte arrays are also of the same length + * and that their byte order is the same as the byte order of the original byte + * arrays. Also, when encoding two byte arrays of different lengths, the byte + * order of the encoded byte arrays should be the same as of the original byte + * arrays. To accomplish this, we use a filler character such that all encoding + * is always using the same number of encoded bytes per original byte (i.e. 3 + * nibbles per original byte, so a loss of 50%). The filler nibble should be + * smaller than any real encoded value, but bigger than the nibbles used for + * zero and NULL. + *

+ * To encode a NULL, we output 0x00 and return. Otherwise, to encode a non-NULL + * byte array we BCD encode the byte array and then append the + * terminator nibble at the end. Decoding is simply the reverse of the above + * operations. + *

+ * The fixed length prefix will be written as is before the variable length + * part. This can be useful to prevent BCD encoding on a fixed part (which you + * know will always be there anyways). + * + *

Descending sort

+ * To sort in descending order we perform the same encodings as in ascending + * sort, except we logically invert (take the 1's complement of) each byte, + * including the null and termination bytes. + * + *

Implicit Termination

+ * If {@link #termination} is false and the sort order is ascending, we + * encode NULL values as a zero-length byte array, and omit the terminator byte + * for every byte array except the empty byte array. Implicit termination is + * discussed further in {@link RowKey}. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class VariableLengthBytesWritableRowKey extends RowKey { + + private static final byte NULL = 0x00; + private static final byte TERMINATOR_NIBBLE = 0x01; + private static final byte TWO_TERMINATOR_NIBBLES = 0x11; + private static final char FILLER = 'f'; // whatever, just not a 0-9 digit + private static final byte FILLER_NIBBLE = 0x02; + + private final int fixedPrefixLength; + + public VariableLengthBytesWritableRowKey() { + // no fixed part by default + this(0); + } + + public VariableLengthBytesWritableRowKey(int fixedPrefixLength) { + if (fixedPrefixLength < 0) + throw new IllegalArgumentException("fixed prefix length can not be < 0"); + + this.fixedPrefixLength = fixedPrefixLength; + } + + @Override + public Class getSerializedClass() { + return BytesWritable.class; + } + + @Override + public int getSerializedLength(Object o) throws IOException { + if (o == null) return terminate() ? fixedPrefixLength + 1 : fixedPrefixLength; + + final BytesWritable input = (BytesWritable) o; + return fixedPrefixLength + + getSerializedLength(toStringRepresentation(input.getBytes(), fixedPrefixLength, + input.getLength() - fixedPrefixLength)); + } + + /** + * @return the length of a String with digits if serialized in our + * customized BCD format. We require 1 byte for every 2 characters, + * rounding up. Furthermore, if the number of characters is even, + * we require an additional byte for the terminator nibble if + * terminate() is true. + */ + private int getSerializedLength(String s) { + if (terminate()) + return (s.length() + 2) / 2; + else + return s.length() == 0 ? 1 : (s.length() + 1) / 2; + } + + @Override + public void serialize(Object o, ImmutableBytesWritable bytesWritable) + throws IOException { + byte[] bytesToWriteIn = bytesWritable.get(); + int offset = bytesWritable.getOffset(); + + if (o == null) { + if (fixedPrefixLength > 0) + throw new IllegalStateException("excepted at least " + + fixedPrefixLength + " bytes to write"); + else if (terminate()) { + // write one (masked) null byte + bytesToWriteIn[offset] = mask(NULL); + RowKeyUtils.seek(bytesWritable, 1); + } + } else { + final BytesWritable input = (BytesWritable) o; + if (fixedPrefixLength > input.getLength()) + throw new IllegalStateException( + "excepted at least " + fixedPrefixLength + " bytes to write"); + else { + encodeFixedPrefix(input.getBytes(), bytesWritable); + encodedCustomizedReversedPackedBcd( + toStringRepresentation(input.getBytes(), fixedPrefixLength, input.getLength() + - fixedPrefixLength), bytesWritable); + } + } + } + + private void encodeFixedPrefix(byte[] input, ImmutableBytesWritable bytesWritable) { + final byte[] output = bytesWritable.get(); + final int offset = bytesWritable.getOffset(); + for (int i = 0; i < fixedPrefixLength; i++) { + output[offset + i] = mask(input[i]); + } + + RowKeyUtils.seek(bytesWritable, fixedPrefixLength); + } + + private String toStringRepresentation(byte[] bytes, int offset, int length) { + final StringBuilder result = new StringBuilder(); + for (int i = 0; i < length; i++) { + byte aByte = bytes[offset + i]; + // An unsigned byte results in max 3 decimal digits (because max + // value is "255") and we want to use always the max size such that + // if two byte arrays have the same length, the two encoded byte + // arrays also have the same length (such that byte sort order is + // remained). Therefore, we fill the gaps with a filler character. + result.append(prependZeroes(3, "" + UnsignedBytes.toInt(aByte))); + } + + return result.toString(); + } + + /** + * Prepend a string with zeroes up to the given total length. Note that we + * could have used String.format("%03d", ...) here but this turned out to + * be a performance killer! + * + * @param totalLength length of the returned string + * @param string string to prepend with zeroes + * @return zero prepended string + */ + private String prependZeroes(int totalLength, String string) { + if (string.length() >= totalLength) { + // no need to prepend anything + return string; + } else { + // prepend with zeroes up to requested total length + final StringBuilder zeroes = new StringBuilder(totalLength - string.length()); + for (int i = 0; i < totalLength - string.length(); i++) { + zeroes.append("0"); + } + + return zeroes.toString() + string; + } + } + + private byte[] fromStringRepresentation(String string) { + // each 3 digits correspond to an encoded byte + final byte[] result = new byte[string.length() / 3]; + + // process the string per 3 digits + final char[] digits = string.toCharArray(); + for (int i = 0; i < result.length; i++) { + int digitIdx = i * 3; + StringBuilder singleByteBcdString = new StringBuilder(); + for (int j = 0; j < 3; j++) { + singleByteBcdString.append( + digits[digitIdx + j] == FILLER ? "" : digits[digitIdx + j]); + } + + result[i] = (byte) Integer.parseInt(singleByteBcdString.toString()); + } + + return result; + } + + @Override + public void skip(ImmutableBytesWritable bytesWritable) throws IOException { + if (bytesWritable.getLength() <= 0) + return; + + byte[] bytes = bytesWritable.get(); + int offset = bytesWritable.getOffset(); + int len = bytesWritable.getLength(); + + RowKeyUtils.seek(bytesWritable, + fixedPrefixLength + + getBcdEncodedLength(bytes, offset + fixedPrefixLength, len - fixedPrefixLength)); + } + + protected int getBcdEncodedLength(byte[] bytes, int offset, int len) { + + int i = 0; + while (i < len) { + byte c = mask(bytes[offset + i++]); + if ((c & 0x0f) == TERMINATOR_NIBBLE) break; + } + + return i; + } + + @Override + public Object deserialize(ImmutableBytesWritable bytesWritable) throws IOException { + final int length = bytesWritable.getLength(); + + if (length <= 0 && fixedPrefixLength == 0) + return null; + + final int offset = bytesWritable.getOffset(); + final int variableLengthSuffixOffset = offset + fixedPrefixLength; + final int variableLengthSuffixLength = length - fixedPrefixLength; + + final byte[] fixedLengthPrefix = decodeFixedPrefix(bytesWritable); + + final byte[] variableLengthSuffix = fromStringRepresentation(decodeCustomizedReversedPackedBcd( + bytesWritable, variableLengthSuffixOffset, variableLengthSuffixLength)); + + return new BytesWritable(merge(fixedLengthPrefix, variableLengthSuffix)); + } + + private static byte[] merge(byte[] array1, byte[] array2) { + byte[] merged = new byte[array1.length + array2.length]; + System.arraycopy(array1, 0, merged, 0, array1.length); + System.arraycopy(array2, 0, merged, array1.length, array2.length); + return merged; + } + + private byte[] decodeFixedPrefix(ImmutableBytesWritable input) { + final byte[] output = new byte[fixedPrefixLength]; + + final byte[] inputBytes = input.get(); + final int offset = input.getOffset(); + for (int i = 0; i < fixedPrefixLength; i++) { + output[i] = mask(inputBytes[offset + i]); + } + + RowKeyUtils.seek(input, fixedPrefixLength); + return output; + } + + private static byte[] CUSTOMIZED_BCD_ENC_LOOKUP = + new byte[]{3, 4, 5, 6, 7, 9, 10, 12, 14, 15}; + + // note that the value -1 means invalid + private static byte[] CUSTOMIZED_BCD_DEC_LOOKUP = + new byte[]{-1, -1, -1, 0, 1, 2, 3, 4, -1, 5, 6, -1, 7, -1, 8, 9}; + + /** + * Encodes a String with digits into a "customized reversed packed binary + * coded decimal" byte array. + */ + void encodedCustomizedReversedPackedBcd(String decimalDigits, + ImmutableBytesWritable bytesWritable) { + byte[] bytes = bytesWritable.get(); + int offset = bytesWritable.getOffset(); + final int encodedLength = getSerializedLength(decimalDigits); + final char[] digits = decimalDigits.toCharArray(); + + for (int i = 0; i < encodedLength; i++) { + // initialize with termination nibbles + byte bcd = TWO_TERMINATOR_NIBBLES; + int digitsIdx = 2 * i; + boolean firstNibbleWritten = false; + if (digitsIdx < digits.length) { + bcd = (byte) (lookupDigit(digits[digitsIdx]) << 4); + firstNibbleWritten = true; + } + if (++digitsIdx < digits.length) { + bcd |= lookupDigit(digits[digitsIdx]); + } else if (firstNibbleWritten) { + // uneven number of digits -> write terminator nibble + bcd |= TERMINATOR_NIBBLE; + } + // this could be two bcd nibbles or two terminator nibbles + bytes[offset + i] = mask(bcd); + } + + RowKeyUtils.seek(bytesWritable, encodedLength); + } + + private byte lookupDigit(char digit) { + if (digit != FILLER) + return CUSTOMIZED_BCD_ENC_LOOKUP[Character.digit(digit, 10)]; + else return FILLER_NIBBLE; + } + + /** + * Decodes a "customized reversed packed binary coded decimal" byte array + * into a String with digits. + * + * @param bytesWritable the customized packed BCD encoded byte array + * @return The decoded value + */ + String decodeCustomizedReversedPackedBcd(ImmutableBytesWritable bytesWritable, + int offset, int length) { + int i = 0; + final byte[] bytes = bytesWritable.get(); + + StringBuilder sb = new StringBuilder(); + while (i < length) { + byte c = mask(bytes[offset + i++]); + if (addDigit((byte) ((c >>> 4) & 0x0f), sb) || addDigit((byte) (c & 0x0f), sb)) break; + } + + RowKeyUtils.seek(bytesWritable, i); + return sb.toString(); + } + + /** + * Decodes a Binary Coded Decimal digit and adds it to a string. Returns + * true (and leaves string unmodified) if digit is the terminator nibble. + * Returns false otherwise. + */ + protected boolean addDigit(byte bcd, StringBuilder sb) { + if (bcd == TERMINATOR_NIBBLE) { + return true; + } else { + if (bcd != FILLER_NIBBLE) sb.append(CUSTOMIZED_BCD_DEC_LOOKUP[bcd]); + else sb.append(FILLER); + return false; + } + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/package-info.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/package-info.java new file mode 100644 index 0000000..567a1ac --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/package-info.java @@ -0,0 +1,135 @@ +/** + * This package serializes a wide range of simple and complex key data types + * into a sort-order preserving byte encoding. Sorting the serialized byte + * arrays produces the same ordering as the natural sort order of the + * underlying data type. The package can be used to generate byte-valued + * serialized keys for sorted key-value data stores such as HBase. + * + * 0. Design + * + * The goal of this project is to produce extremely space efficient byte + * serializations for common data types while ensuring that the resulting byte + * array sorts correctly. As a consequence, we provide types optimized for many + * different situations (32-bit variable length unsigned integers, 64-bit fixed + * signed integers, etc) so that you do not pay for features that you do not + * use. + * + * In contrast to more ad-hoc sortable byte serialization designs, we support + * all values for included types. For example, our double precision floating + * point encoding supports NaNs, positive/negative zero, subnormals, etc, while + * our String encoding supports NULLs, the empty string, etc. This is done + * without compromising space efficiency, usually by taking advantage of the + * underlying encoding format (i.e. IEEE-745 for doubles, UTF-8 for Strings). + * Each RowKey class has a JavaDoc precisely describing its serialization + * format. + * + * RowKeys may be a primitive (single-value) type or complex (composite) type + * composed of many values. Complex types are themselves composed of other + * complex types and primitive types. + * + * 1. Supported Primitive Types + * We support a wide range of primitive (single-value) types: + * + * (i) Variable-Length Integers (Int, IntWritable, Long, LongWritable) + * Variable-length 32-bit integer, and 64-bit longs encoded in a sort order + * preserving variable length format similar in design to Zig-Zag encoding. + * Both signed and unsigned integer types are supported. + * + * Small absolute values such as -1 or 17 take up 1 byte, larger absolute + * values such as 65536 or 2^28 require more bytes. The maximum length of + * a variable-length integer is 5 bytes, and a variable-length long is 9 bytes. + * NULL values are supported without decreasing the range of supported integers + * or negatively impacting the space efficiency of the encoding. + * + * (ii) Fixed-Width Integers (Int, IntWritable, Long, LongWritable) + * Fixed-length 32-bit integers and 64-bit longs are serialized directly to the + * byte array in big endian order. Both signed and unsigned types are supported. + * This is the only row key type that does not support null values. Useful only + * when common values are very large (>2^28 for integers, >2^59 for longs), + * otherwise variable-length integers are much more space efficient. + * + * (iii) Floating Point (Float, FloatWritable, Double, DoubleWritable) + * Fixed-length 32-bit float and 64-bit double floating point numbers. Null + * values are supported at no additional space cost by reserving a NaN + * value unused by Java (it is stripped out by NaN canonicalization during + * Double.doubleToLongBits). Correctly sorts all values, including subnormals, + * infinity, positive and negative zero, etc. + * + * (iv) BigDecimal + * Variable-length bigdecimal format. Scale is encoded as a variable length + * integer, and the significand is encoded as a variable-length binary coded + * decimal string. Supports all BigDecimal values, as well as NULL. + * + * (v) String types (Text, UTF-8 byte array, String) + * Variable-length format for storing UTF-8 strings. Correctly handles sorting + * all valid UTF-8 strings, as well as empty string and NULL values. NULLs and + * string terminators are encoded when necessary by using leveraging invalid + * UTF-8 header bytes, although in many cases they can be omitted from the + * serialization entirely (see Section 3). + * + * 2. Complex (Composite) Types + * Currently, the supported complex type is a struct (record) key, which is + * used to create a composite key. The struct key is composed a fixed number of + * field row keys (which may be any valid row key type, including another + * struct). + * + * For example, let us suppose the user wants a key composed of a timestamp, + * username, and spam score. The timestamps should be sorted in descending + * order (so that it is easy to always retrieve the most recent score from the + * database). For this representation, we could create a struct with three + * primitive field row keys: a LongWritable with descending sort order, a UTF-8 + * string, and a float. + * + * For convenience, a StructBuilder class is provided to build struct keys more + * easily, and a StructIterator class is provided to iterate over the fields of + * a serialized struct. + * + * 3. Important Row Key Methods + * All row key types are subclasses of type RowKey, and the following methods: + * (i) get serialized length - Given an object to be serialized, returns the + * length of the object's serialized representation (so you can allocate + * storage space) + * (ii) serialize (writing a type to an immutablebyteswritable or byte array) + * (iii) deserialize (reading a type from immutablebyteswritable or byte array) + * (iv) skip (skipping over a serialized type in an immutablebyteswritable + * without deserialization the object) + * + * 4. Usage Guidelines + * (1) Prefer Writable or byte types (IntWritable, UTF8, Text) to immutable + * object (Integer, Long, String) types. The latter cannot be re-used across + * multiple serialization/deserialization operations. If you have a MapReduce + * job reading/writing millions or billions of keys, you'll want to use + * non-immutable types to reduce the number of object instantations. For more + * information, each RowKey class JavaDoc has a usage section describing its + * performance characteristics for object instantiation and byte array copying. + * + * (2) Use the most precise format you require...but no more precise + * For variable-length integers, you will gain slightly more efficient storage + * by using unsigned integer types instead of signed types (if your integers + * are unsigned), and using 32-bit integer types instead of 64-bit longs. + * + * However, do not use a 32-bit variable-length integer type unless you are + * certain that all your values, for the lifetime of your application, will + * fit in 32-bits. The variable-length long encoding is very efficient, and + * when compared to the 32-bit integer encoding the cost the additional cost + * is modest at best (a single bit of overhead for 3/4/5-byte integer + * encodings, and no overhead for 1 and 2 byte encodings). When in doubt about + * the range of values you will store, use a signed or unsigned long type. + * Signed types have 1 additional bit of overhead in comparison to unsigned + * types (for all integer encoding lengths). + * + * (3) Ascending sort results in slightly smaller encodings for strings, + * bigdecimals, and any NULL value encoding when mustTerminate is false. This + * is because when performing ascending sort, we can omit trailing + * end-of-string or null value bytes and just use the end of the byte array as + * an implicit terminator as described in the RowKey class's JavaDoc. If you + * don't have a strong preference on your sort order, ascending sort (in the + * above situations) results in slightly more efficient encodings. The worst + * case difference in serialization overhead between ascending and descending + * order is a single byte per serialized RowKey. + * + * 5. Additional Documentation + * A set of example classes using the RowKey APIs are provided in src/examples. + * These classes demonstrate correct API usage, and are a good starting point + */ +package org.apache.hadoop.hbase.util.orderly; \ No newline at end of file diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKeyTestCase.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKeyTestCase.java new file mode 100644 index 0000000..7ca09f2 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKeyTestCase.java @@ -0,0 +1,112 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.Writable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public abstract class AbstractVarIntRowKeyTestCase extends RandomRowKeyTestCase +{ + protected int reservedBits, reservedValue; + protected AbstractVarIntRowKey vi; + + public abstract AbstractVarIntRowKey createVarIntRowKey(); + + @Override + public RowKey createRowKey() { + vi = createVarIntRowKey(); + reservedBits = r.nextInt(vi.getMaxReservedBits()); + reservedValue = r.nextInt(1 << reservedBits); + return vi.setReservedBits(reservedBits) + .setReservedValue(reservedValue); + } + + protected void verifyReserved(ImmutableBytesWritable w) { + byte[] b = w.get(); + int offset = w.getOffset(); + + int reservedActual = (b[offset] & 0xff) >>> Byte.SIZE - reservedBits; + assertEquals("Reserved bits corrupt", reservedValue, reservedActual); + } + + @Override + public void testSerialization(Object o, ImmutableBytesWritable w) + throws IOException + { + super.testSerialization(o, w); + if (o != null || vi.terminate()) + verifyReserved(w); + } + + @Override + public void testSkip(Object o, ImmutableBytesWritable w) + throws IOException + { + super.testSkip(o, w); + if (o != null || vi.terminate()) + verifyReserved(w); + } + + @Override + public void testSort(Object o1, ImmutableBytesWritable w1, Object o2, + ImmutableBytesWritable w2) throws IOException + { + super.testSort(o1, w1, o2, w2); + if (o1 != null || vi.terminate()) + verifyReserved(w1); + if (o2 != null || vi.terminate()) + verifyReserved(w2); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + long x = vi.getWritable((Writable)o1), + y = vi.getWritable((Writable)o2); + + /* Sign bits may be implicit (not present in x/y) or explicit (stored + * in the most significant bit of x/y using two's complement encoding). We + * compare the sign bits first, and then only compare x and y if the sign + * bits are equal. + */ + if (vi.getSign(x) != vi.getSign(y)) + return (vi.getSign(x) == 0 ? 1 : 0) - (vi.getSign(y) == 0 ? 1 : 0); + + /* The Java long comparison operator interprets the most significant bit of + * x/y as the sign bit, but in implicit signed representations the most + * significant bit is data. We flip the most significant bit of x/y with an + * XOR so that data will compare correctly when using Java long comparison. + * This bit flip has no effect on the comparison if x and y use an explicit + * sign bit because the sign bits are equal, and inverting two equal values + * does not affect the comparison result. + */ + x ^= Long.MIN_VALUE; + y ^= Long.MIN_VALUE; + return (x > y ? 1 : 0) - (y > x ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RandomRowKeyTestCase.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RandomRowKeyTestCase.java new file mode 100644 index 0000000..cf514cf --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RandomRowKeyTestCase.java @@ -0,0 +1,239 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public abstract class RandomRowKeyTestCase extends RowKeyTestCase +{ + protected Random r; + protected int numTests, maxRedZone; + + @Before + @Override + public void setUp() { + if (r == null) + r = new Random(Long.valueOf(System.getProperty("test.random.seed", "0"))); + numTests = Integer.valueOf(System.getProperty("test.random.count", "8192")); + maxRedZone = Integer.valueOf(System.getProperty("test.random.maxredzone", + "16")); + super.setUp(); + } + + public RandomRowKeyTestCase setRandom(Random r) { + this.r = r; + return this; + } + + public RandomRowKeyTestCase setNumTests(int numTests) { + this.numTests = numTests; + return this; + } + + public RandomRowKeyTestCase setMaxRedZone(int maxRedZone) { + this.maxRedZone = maxRedZone; + return this; + } + + @Override + public ImmutableBytesWritable allocateBuffer(Object o) throws IOException { + return new RedZoneImmutableBytesWritable(key.getSerializedLength(o), + key.getTermination() == Termination.MUST); + } + + @Override + public void serialize(Object o, ImmutableBytesWritable w) throws IOException + { + byte[] b; + int len; + + switch(r.nextInt(4)) { + case 0: /* serialize(Object, ImmutableBytesWritable) */ + key.serialize(o, w); + break; + + case 1: /* serialize(Object) */ + b = key.serialize(o); + System.arraycopy(b, 0, w.get(), w.getOffset(), b.length); + RowKeyUtils.seek(w, b.length); + break; + + case 2: /* serialize(Object, byte[]) */ + len = key.getSerializedLength(o); + b = new byte[len]; + key.serialize(o, b); + System.arraycopy(b, 0, w.get(), w.getOffset(), len); + RowKeyUtils.seek(w, len); + break; + + default: /* serialize(Object, byte[], offset) */ + key.serialize(o, w.get(), w.getOffset()); + len = key.getSerializedLength(o); + RowKeyUtils.seek(w, len); + break; + } + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException + { + Object o; + + switch(r.nextInt(3)) { + case 0: /* deserialize(ImmutableBytesWritable) */ + o = key.deserialize(w); + break; + + case 1: /* deserialize(byte[] b) */ + o = key.deserialize(Arrays.copyOfRange(w.get(), w.getOffset(), + w.getOffset() + w.getLength())); + key.skip(w); + break; + + default: /* deserialize(byte[] b, int offset) */ + o = key.deserialize(Arrays.copyOfRange(w.get(), 0, w.getOffset() + + w.getLength()), w.getOffset()); + key.skip(w); + break; + } + + return o; + } + + @Override + public void testSerialization(Object o, ImmutableBytesWritable w) + throws IOException + { + super.testSerialization(o, w); + ((RedZoneImmutableBytesWritable)w).verify(); + } + + @Override + public void testSkip(Object o, ImmutableBytesWritable w) + throws IOException + { + super.testSkip(o, w); + ((RedZoneImmutableBytesWritable)w).verify(); + } + + @Override + public void testSort(Object o1, ImmutableBytesWritable w1, Object o2, + ImmutableBytesWritable w2) throws IOException + { + RedZoneImmutableBytesWritable r1 = (RedZoneImmutableBytesWritable) w1, + r2 = (RedZoneImmutableBytesWritable) w2; + int r1Length = r1.getLength(), + r2Length = r2.getLength(); + r1.set(r1.get(), r1.getOffset(), r1.getBufferLength()); + r2.set(r2.get(), r2.getOffset(), r2.getBufferLength()); + + super.testSort(o1, r1, o2, r2); + + r1.set(r1.get(), r1.getOffset(), r1Length); + r1.verify(); + r2.set(r2.get(), r2.getOffset(), r2Length); + r2.verify(); + } + + @Test + @Override + public void testRowKey() throws IOException { + for (int i = 0; i < numTests; i++) { + setRowKey(createRowKey().setOrder(r.nextBoolean() ? Order.ASCENDING : + Order.DESCENDING).setTermination(r.nextBoolean() ? Termination.MUST : Termination.AUTO)); + super.testRowKey(); + if (i != numTests - 1) { + tearDown(); + setUp(); + } + } + } + + protected class RedZoneImmutableBytesWritable + extends ImmutableBytesWritable + { + byte[] header, trailer; + int buflen; + + public RedZoneImmutableBytesWritable() { } + + public RedZoneImmutableBytesWritable(int len, boolean includeTrailer) { + reset(len, includeTrailer); + } + + private void randomize(byte[] b, int offset, int len) { + for (int i = offset; i < len; i++) + b[i] = (byte) r.nextInt(); + } + + private void randomize(byte[] b) { + randomize(b, 0, b.length); + } + + public RedZoneImmutableBytesWritable reset(int len, boolean includeTrailer) + { + this.buflen = len; + if (maxRedZone > 0) { + header = new byte[r.nextInt(maxRedZone)]; + trailer = new byte[r.nextInt(maxRedZone)]; + randomize(header); + randomize(trailer); + } else { + header = RowKeyUtils.EMPTY; + trailer = RowKeyUtils.EMPTY; + } + + byte[] b = new byte[header.length + len + trailer.length]; + System.arraycopy(header, 0, b, 0, header.length); + System.arraycopy(trailer, 0, b, header.length + len, trailer.length); + randomize(b, header.length, len); + + if (includeTrailer && trailer.length > 0) + len += r.nextInt(trailer.length); + set(b, header.length, len); + return this; + } + + public int getBufferLength() { return buflen; } + + private void verifyEquals(byte[] a, int aOffset, byte[] b, + int bOffset, int len) + { + for (int i = 0; i < len; i++) + assertEquals("Header/Trailer corruption", a[aOffset + i], + b[bOffset + i]); + } + + public void verify() { + verifyEquals(header, 0, get(), 0, header.length); + verifyEquals(trailer, 0, get(), header.length + buflen, trailer.length); + } + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RowKeyTestCase.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RowKeyTestCase.java new file mode 100644 index 0000000..74bb671 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/RowKeyTestCase.java @@ -0,0 +1,134 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public abstract class RowKeyTestCase +{ + protected RowKey key; + + protected abstract RowKey createRowKey(); + + public abstract Object createObject(); + + public abstract int compareTo(Object o1, Object o2); + + public RowKeyTestCase setRowKey(RowKey key) { this.key = key; return this; } + + public RowKeyTestCase setRowKey() { return setRowKey(createRowKey()); } + + public RowKey getRowKey() { return key; } + + @Before + public void setUp() { } + + @After + public void tearDown() { key = null; } + + public void serialize(Object o, ImmutableBytesWritable w) + throws IOException + { + key.serialize(o, w); + } + + public Object deserialize(ImmutableBytesWritable w) throws IOException { + return key.deserialize(w); + } + + public ImmutableBytesWritable allocateBuffer(Object o) + throws IOException + { + return new ImmutableBytesWritable(new byte[key.getSerializedLength(o)]); + } + + public void assertBoundsEquals(ImmutableBytesWritable w, int offset, + int len) + { + assertEquals("Offset corrupt", offset, w.getOffset()); + assertEquals("Length corrupt", len, w.getLength()); + } + + public void testSerialization(Object o, ImmutableBytesWritable w) + throws IOException + { + int origOffset = w.getOffset(), + origLength = w.getLength(), + expectedLength = key.getSerializedLength(o); + + serialize(o, w); + assertBoundsEquals(w, origOffset + expectedLength, + origLength - expectedLength); + + w.set(w.get(), origOffset, origLength); + Object p = deserialize(w); + + assertEquals("Data corrupt", 0, compareTo(o, p)); + assertBoundsEquals(w, origOffset + expectedLength, + origLength - expectedLength); + w.set(w.get(), origOffset, origLength); + } + + public void testSkip(Object o, ImmutableBytesWritable w) + throws IOException + { + int origOffset = w.getOffset(), + origLength = w.getLength(), + expectedLength = key.getSerializedLength(o); + key.skip(w); + assertBoundsEquals(w, origOffset + expectedLength, + origLength - expectedLength); + w.set(w.get(), origOffset, origLength); + } + + public void testSort(Object o1, ImmutableBytesWritable w1, Object o2, + ImmutableBytesWritable w2) throws IOException + { + int expectedOrder = compareTo(o1, o2), + byteOrder = Integer.signum(Bytes.compareTo(w1.get(), w1.getOffset(), + w1.getLength(), w2.get(), w2.getOffset(), w2.getLength())); + if (key.getOrder() == Order.DESCENDING) + expectedOrder = -expectedOrder; + assertEquals("Invalid sort order", expectedOrder, byteOrder); + } + + @Test + public void testRowKey() throws IOException { + Object o1 = createObject(), + o2 = createObject(); + ImmutableBytesWritable w1 = allocateBuffer(o1), + w2 = allocateBuffer(o2); + testSerialization(o1, w1); + testSerialization(o2, w2); + testSkip(o1, w1); + testSkip(o2, w2); + testSort(o1, w1, o2, w2); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestBigDecimalRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestBigDecimalRowKey.java new file mode 100644 index 0000000..4af2873 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestBigDecimalRowKey.java @@ -0,0 +1,98 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Before; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestBigDecimalRowKey extends RandomRowKeyTestCase +{ + protected int maxBits; + + @Before + @Override + public void setUp() { + super.setUp(); + maxBits = Integer.valueOf(System.getProperty("test.random.maxbits", + "1024")); + } + + @Override + public RowKey createRowKey() { return new BigDecimalRowKey(); } + + private BigInteger randBigInteger() { + int bits = r.nextInt(maxBits); + switch (r.nextInt(128)) { + case 0: + bits &= 63; + case 1: + bits &= 65535; + case 2: + bits &= ((1 << 21) - 1); + } + + BigInteger i = new BigInteger(bits, r); + if (r.nextBoolean()) + i = i.negate(); + return i; + } + + private int randScale(int unscaledBits) { + int scale = r.nextInt(Integer.MAX_VALUE - unscaledBits); + if (r.nextBoolean()) scale = -scale; + + switch (r.nextInt(128)) { + case 0: + scale = (scale & 127) - 64; + break; + + case 1: + scale = (scale & 16383) - 8192; + break; + + case 2: + scale = (scale & ((1 << 21) - 1)) - (1 << 20); + break; + } + + return scale; + } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + BigInteger i = randBigInteger(); + int scale = randScale(i.bitCount()); + return new BigDecimal(i, scale); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + return ((BigDecimal)o1).compareTo((BigDecimal)o2); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleRowKey.java new file mode 100644 index 0000000..34a0d3a --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleRowKey.java @@ -0,0 +1,46 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.DoubleWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestDoubleRowKey extends TestDoubleWritableRowKey +{ + @Override + public RowKey createRowKey() { return new DoubleRowKey(); } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return Double.valueOf(((DoubleWritable)o).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(new DoubleWritable((Double)o1), + new DoubleWritable((Double)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleWritableRowKey.java new file mode 100644 index 0000000..7c339c1 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestDoubleWritableRowKey.java @@ -0,0 +1,91 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.DoubleWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestDoubleWritableRowKey extends RandomRowKeyTestCase +{ + @Override + public RowKey createRowKey() { return new DoubleWritableRowKey(); } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + double d; + switch (r.nextInt(128)) { + case 0: + d = +0.0d; + break; + + case 1: + d = -0.0d; + break; + + case 2: + d = Double.POSITIVE_INFINITY; + break; + + case 3: + d = Double.NEGATIVE_INFINITY; + break; + + case 4: + d = Double.NaN; + break; + + default: + d = r.nextDouble(); + break; + } + + return new DoubleWritable(d); + } + + private boolean isPositiveZero(double d) { + return 1/d == Double.POSITIVE_INFINITY; + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + double d = ((DoubleWritable)o1).get(), + e = ((DoubleWritable)o2).get(); + + if (!Double.isNaN(d) && !Double.isNaN(e) && !(d == 0 && e == 0)) + return ((d > e) ? 1 : 0) - ((e > d) ? 1 : 0); + + if (Double.isNaN(d)) { + if (Double.isNaN(e)) + return 0; + return 1; + } else if (Double.isNaN(e)) { + return -1; + } else /* d == +/-0.0 && e == +/-0.0 */ { + return (isPositiveZero(d) ? 1 : 0) - (isPositiveZero(e) ? 1 : 0); + } + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedByteArrayRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedByteArrayRowKey.java new file mode 100644 index 0000000..8a544aa --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedByteArrayRowKey.java @@ -0,0 +1,54 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.BytesWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedByteArrayRowKey extends TestFixedBytesWritableRowKey { + + final int length = 123; + + @Override + public RowKey createRowKey() { + return new FixedByteArrayRowKey(length); + } + + @Override + public Object createObject() { + final byte[] randomBytes = new byte[length]; + r.nextBytes(randomBytes); + return randomBytes; + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + byte[] b1 = ((byte[])o1); + byte[] b2 = ((byte[])o2); + + final int compareTo = new BytesWritable(b1).compareTo(new BytesWritable(b2)); + + return compareTo < 0 ? -1 : compareTo > 0 ? 1 : 0; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedBytesWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedBytesWritableRowKey.java new file mode 100644 index 0000000..7b477cc --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedBytesWritableRowKey.java @@ -0,0 +1,54 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.BytesWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedBytesWritableRowKey extends RandomRowKeyTestCase { + + final int length = 132; + + @Override + public RowKey createRowKey() { + return new FixedBytesWritableRowKey(length); + } + + @Override + public Object createObject() { + final byte[] randomBytes = new byte[length]; + r.nextBytes(randomBytes); + return new BytesWritable(randomBytes); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + BytesWritable b1 = ((BytesWritable)o1); + BytesWritable b2 = ((BytesWritable)o2); + + final int compareTo = b1.compareTo(b2); + + return compareTo < 0 ? -1 : compareTo > 0 ? 1 : 0; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntWritableRowKey.java new file mode 100644 index 0000000..6428dd3 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntWritableRowKey.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedIntWritableRowKey extends RandomRowKeyTestCase +{ + @Override + public RowKey createRowKey() { return new FixedIntWritableRowKey(); } + + @Override + public Object createObject() { + return new IntWritable(r.nextInt()); + } + + @Override + public int compareTo(Object o1, Object o2) { + int x = ((IntWritable)o1).get(), + y = ((IntWritable)o2).get(); + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntegerRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntegerRowKey.java new file mode 100644 index 0000000..fa85873 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedIntegerRowKey.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedIntegerRowKey extends TestFixedIntWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedIntegerRowKey(); } + + @Override + public Object createObject() { + return Integer.valueOf(((IntWritable)super.createObject()).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + int x = (Integer) o1, + y = (Integer) o2; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongRowKey.java new file mode 100644 index 0000000..1e24b43 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongRowKey.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedLongRowKey extends TestFixedLongWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedLongRowKey(); } + + @Override + public Object createObject() { + return Long.valueOf(((LongWritable)super.createObject()).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + long x = (Long) o1, + y = (Long) o2; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongWritableRowKey.java new file mode 100644 index 0000000..531e196 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedLongWritableRowKey.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedLongWritableRowKey extends RandomRowKeyTestCase +{ + @Override + public RowKey createRowKey() { return new FixedLongWritableRowKey(); } + + @Override + public Object createObject() { + return new LongWritable(r.nextLong()); + } + + @Override + public int compareTo(Object o1, Object o2) { + long x = ((LongWritable)o1).get(), + y = ((LongWritable)o2).get(); + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntWritableRowKey.java new file mode 100644 index 0000000..e4b3477 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntWritableRowKey.java @@ -0,0 +1,39 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedUnsignedIntWritableRowKey + extends TestFixedIntWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedUnsignedIntWritableRowKey(); } + + @Override + public int compareTo(Object o1, Object o2) { + long x = ((IntWritable)o1).get() ^ Integer.MIN_VALUE, + y = ((IntWritable)o2).get() ^ Integer.MIN_VALUE; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntegerRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntegerRowKey.java new file mode 100644 index 0000000..9edec96 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedIntegerRowKey.java @@ -0,0 +1,44 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedUnsignedIntegerRowKey + extends TestFixedUnsignedIntWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedUnsignedIntegerRowKey(); } + + @Override + public Object createObject() { + return Integer.valueOf(((IntWritable)super.createObject()).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + int x = (Integer) o1 ^ Integer.MIN_VALUE, + y = (Integer) o2 ^ Integer.MIN_VALUE; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongRowKey.java new file mode 100644 index 0000000..e67b148 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongRowKey.java @@ -0,0 +1,44 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedUnsignedLongRowKey + extends TestFixedUnsignedLongWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedUnsignedLongRowKey(); } + + @Override + public Object createObject() { + return Long.valueOf(((LongWritable)super.createObject()).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + long x = (Long) o1 ^ Long.MIN_VALUE, + y = (Long) o2 ^ Long.MIN_VALUE; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongWritableRowKey.java new file mode 100644 index 0000000..0a01b90 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFixedUnsignedLongWritableRowKey.java @@ -0,0 +1,39 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFixedUnsignedLongWritableRowKey + extends TestFixedLongWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FixedUnsignedLongWritableRowKey(); } + + @Override + public int compareTo(Object o1, Object o2) { + long x = ((LongWritable)o1).get() ^ Long.MIN_VALUE, + y = ((LongWritable)o2).get() ^ Long.MIN_VALUE; + + return ((x > y) ? 1 : 0) - ((y > x) ? 1 : 0); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatRowKey.java new file mode 100644 index 0000000..48c7e55 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatRowKey.java @@ -0,0 +1,46 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.FloatWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFloatRowKey extends TestFloatWritableRowKey +{ + @Override + public RowKey createRowKey() { return new FloatRowKey(); } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return Float.valueOf(((FloatWritable)o).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(new FloatWritable((Float)o1), + new FloatWritable((Float)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatWritableRowKey.java new file mode 100644 index 0000000..dd3d0df --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestFloatWritableRowKey.java @@ -0,0 +1,91 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.FloatWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestFloatWritableRowKey extends RandomRowKeyTestCase +{ + @Override + public RowKey createRowKey() { return new FloatWritableRowKey(); } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + float f; + switch (r.nextInt(128)) { + case 0: + f = +0.0f; + break; + + case 1: + f = -0.0f; + break; + + case 2: + f = Float.POSITIVE_INFINITY; + break; + + case 3: + f = Float.NEGATIVE_INFINITY; + break; + + case 4: + f = Float.NaN; + break; + + default: + f = r.nextFloat(); + break; + } + + return new FloatWritable(f); + } + + private boolean isPositiveZero(float f) { + return 1/f == Float.POSITIVE_INFINITY; + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + float f = ((FloatWritable)o1).get(), + g = ((FloatWritable)o2).get(); + + if (!Float.isNaN(f) && !Float.isNaN(g) && !(f == 0 && g == 0)) + return ((f > g) ? 1 : 0) - ((g > f) ? 1 : 0); + + if (Float.isNaN(f)) { + if (Float.isNaN(g)) + return 0; + return 1; + } else if (Float.isNaN(g)) { + return -1; + } else /* f == +/-0.0 && g == +/-0.0 */ { + return (isPositiveZero(f) ? 1 : 0) - (isPositiveZero(g) ? 1 : 0); + } + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntWritableRowKey.java new file mode 100644 index 0000000..a4d8362 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntWritableRowKey.java @@ -0,0 +1,57 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestIntWritableRowKey extends AbstractVarIntRowKeyTestCase +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new IntWritableRowKey(); + } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + int i = r.nextInt(); + switch (r.nextInt(4)) { + case 0: /* Single byte: -64 <= x < 64 */ + i = (i & 127) - 64; + break; + + case 1: /* Double byte: -8192 <= x < 8192 */ + i = (i & 16383) - 8192; + break; + + case 2: /* 1-2 MB */ + i = (i & ((1 << 21) - 1)) - (1 << 20); + break; + + /* case 3: do nothing */ + } + + return new IntWritable(i); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntegerRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntegerRowKey.java new file mode 100644 index 0000000..2ce11b1 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestIntegerRowKey.java @@ -0,0 +1,48 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.IntWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestIntegerRowKey extends TestIntWritableRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new IntegerRowKey(); + } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return Integer.valueOf(((IntWritable)o).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(new IntWritable((Integer)o1), + new IntWritable((Integer)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLazyBigDecimalRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLazyBigDecimalRowKey.java new file mode 100644 index 0000000..e699103 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLazyBigDecimalRowKey.java @@ -0,0 +1,43 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; +import java.math.BigDecimal; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestLazyBigDecimalRowKey extends TestBigDecimalRowKey +{ + @Override + public RowKey createRowKey() { + return new LazyBigDecimalRowKey() { + @Override + public Class getDeserializedClass() { return BigDecimal.class; } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException { + return getBigDecimal((ImmutableBytesWritable)super.deserialize(w)); + } + }; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongRowKey.java new file mode 100644 index 0000000..9245ece --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongRowKey.java @@ -0,0 +1,48 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestLongRowKey extends TestLongWritableRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new LongRowKey(); + } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return Long.valueOf(((LongWritable)o).get()); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(new LongWritable((Long)o1), + new LongWritable((Long)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongWritableRowKey.java new file mode 100644 index 0000000..a9bdc9a --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestLongWritableRowKey.java @@ -0,0 +1,57 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.LongWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestLongWritableRowKey extends AbstractVarIntRowKeyTestCase +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new LongWritableRowKey(); + } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + long l = r.nextLong(); + switch (r.nextInt(4)) { + case 0: /* Single byte: -64 <= x < 64 */ + l = (l & 127) - 64; + break; + + case 1: /* Double byte: -8192 <= x < 8192 */ + l = (l & 16383) - 8192; + break; + + case 2: /* 1-2 MB */ + l = (l & ((1 << 21) - 1)) - (1 << 20); + break; + + /* case 3: do nothing */ + } + + return new LongWritable(l); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStringRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStringRowKey.java new file mode 100644 index 0000000..7d0cbcd --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStringRowKey.java @@ -0,0 +1,46 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestStringRowKey extends TestUTF8RowKey +{ + @Override + public RowKey createRowKey() { return new StringRowKey(); } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return Bytes.toString((byte[])o); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(Bytes.toBytes((String)o1), + Bytes.toBytes((String)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStructRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStructRowKey.java new file mode 100644 index 0000000..04e87f8 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestStructRowKey.java @@ -0,0 +1,182 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.junit.After; +import org.junit.Before; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestStructRowKey extends RandomRowKeyTestCase +{ + protected static final List> + TESTS_PRIMITIVE, TESTS_ALL; + + static { + List> prim = + new ArrayList>(); + prim.add(TestBigDecimalRowKey.class); + prim.add(TestDoubleRowKey.class); + prim.add(TestDoubleWritableRowKey.class); + prim.add(TestFixedIntegerRowKey.class); + prim.add(TestFixedIntWritableRowKey.class); + prim.add(TestFixedLongRowKey.class); + prim.add(TestFixedLongWritableRowKey.class); + prim.add(TestFixedUnsignedIntegerRowKey.class); + prim.add(TestFixedUnsignedIntWritableRowKey.class); + prim.add(TestFixedUnsignedLongRowKey.class); + prim.add(TestFixedUnsignedLongWritableRowKey.class); + prim.add(TestFloatRowKey.class); + prim.add(TestFloatWritableRowKey.class); + prim.add(TestIntegerRowKey.class); + prim.add(TestIntWritableRowKey.class); + prim.add(TestLazyBigDecimalRowKey.class); + prim.add(TestLongRowKey.class); + prim.add(TestLongWritableRowKey.class); + prim.add(TestStringRowKey.class); + prim.add(TestTextRowKey.class); + prim.add(TestUnsignedIntegerRowKey.class); + prim.add(TestUnsignedIntWritableRowKey.class); + prim.add(TestUnsignedLongRowKey.class); + prim.add(TestUnsignedLongWritableRowKey.class); + prim.add(TestUTF8RowKey.class); + TESTS_PRIMITIVE = prim; + + List> all = + new ArrayList>(prim); + all.add(TestStructRowKey.class); + TESTS_ALL = all; + } + + protected int maxFields, maxNest; + protected RandomRowKeyTestCase[] fieldTests; + + public TestStructRowKey() { maxNest = -1; } + + public TestStructRowKey setMaxNest(int maxNest) { + this.maxNest = maxNest; + return this; + } + + protected RandomRowKeyTestCase randField() { + List> cList = + maxNest > 0 ? TESTS_ALL : TESTS_PRIMITIVE; + + RandomRowKeyTestCase t; + try { + t = cList.get(r.nextInt(cList.size())).newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + if (t instanceof TestStructRowKey) + ((TestStructRowKey)t).setMaxNest(maxNest - 1); + t.setRandom(r); + t.setUp(); + return t; + } + + @Before + @Override + public void setUp() { + super.setUp(); + maxFields = Integer.valueOf(System.getProperty("test.random.maxfieldcount", + "16")); + if (maxNest < 0) + maxNest = Integer.valueOf(System.getProperty("test.random.maxfieldnest", + "4")); + + fieldTests = new RandomRowKeyTestCase[r.nextInt(maxFields)]; + for (int i = 0; i < fieldTests.length; i++) + fieldTests[i] = randField(); + } + + @After + @Override + public void tearDown() { + for (int i = 0; i < fieldTests.length; i++) { + fieldTests[i].tearDown(); + fieldTests[i] = null; + } + + super.tearDown(); + } + + @Override + public RowKey createRowKey() { + RowKey[] fields = new RowKey[fieldTests.length]; + for (int i = 0; i < fields.length; i++) { + fields[i] = fieldTests[i].createRowKey(); + fields[i].setOrder(r.nextBoolean() ? Order.ASCENDING : Order.DESCENDING); + fieldTests[i].setRowKey(fields[i]); + } + return new StructRowKey(fields); + } + + @Override + public Object createObject() { + Object[] o = new Object[fieldTests.length]; + for (int i = 0; i < o.length; i++) + o[i] = fieldTests[i].createObject(); + return o; + } + + @Override + public Object deserialize(ImmutableBytesWritable w) throws IOException + { + if (r.nextInt(64) != 0) + return super.deserialize(w); + + Object[] o = new Object[fieldTests.length]; + StructIterator iter = ((StructRowKey)key).iterateOver(w).iterator(); + + int pos = 0; + while (iter.hasNext()) o[pos++] = iter.next(); + return o; + } + + @Override + public int compareTo(Object o1, Object o2) { + Object[] f1 = (Object[]) o1, + f2 = (Object[]) o2; + + if (f1.length != f2.length) + throw new IndexOutOfBoundsException("Comparing fields with length " + + f1.length + " to fields with length " + f2.length); + + for (int i = 0; i < f1.length; i++) { + int r = fieldTests[i].compareTo(f1[i], f2[i]); + if (r != 0) { + if (fieldTests[i].getRowKey().getOrder() == Order.DESCENDING) + r = -r; + if (getRowKey().getOrder() == Order.DESCENDING) + r = -r; + return r; + } + } + + return 0; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestTextRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestTextRowKey.java new file mode 100644 index 0000000..1b4ba3b --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestTextRowKey.java @@ -0,0 +1,46 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.Text; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestTextRowKey extends TestUTF8RowKey +{ + @Override + public RowKey createRowKey() { return new TextRowKey(); } + + @Override + public Object createObject() { + Object o = super.createObject(); + if (o == null) + return o; + return new Text((byte[])o); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return super.compareTo(o1, o2); + return super.compareTo(RowKeyUtils.toBytes((Text)o1), + RowKeyUtils.toBytes((Text)o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUTF8RowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUTF8RowKey.java new file mode 100644 index 0000000..12d8e9e --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUTF8RowKey.java @@ -0,0 +1,61 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestUTF8RowKey extends RandomRowKeyTestCase +{ + protected int maxLength; + + @Before + @Override + public void setUp() { + super.setUp(); + maxLength = Integer.valueOf(System.getProperty("test.random.maxstrlength", + "1024")); + } + + @Override + public RowKey createRowKey() { return new UTF8RowKey(); } + + @Override + public Object createObject() { + if (r.nextInt(128) == 0) + return null; + + int len = r.nextInt(maxLength); + StringBuilder sb = new StringBuilder(len); + + for (int i = 0; i < len; i++) + sb.appendCodePoint(r.nextInt(Character.MAX_CODE_POINT + 1)); + return Bytes.toBytes(sb.toString()); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + return Integer.signum(Bytes.compareTo((byte[])o1, (byte[])o2)); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntWritableRowKey.java new file mode 100644 index 0000000..a3f5c98 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntWritableRowKey.java @@ -0,0 +1,31 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestUnsignedIntWritableRowKey extends TestIntWritableRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new UnsignedIntWritableRowKey(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntegerRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntegerRowKey.java new file mode 100644 index 0000000..d40fb0b --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedIntegerRowKey.java @@ -0,0 +1,31 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestUnsignedIntegerRowKey extends TestIntegerRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new UnsignedIntegerRowKey(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongRowKey.java new file mode 100644 index 0000000..833a8aa --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongRowKey.java @@ -0,0 +1,31 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestUnsignedLongRowKey extends TestLongRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new UnsignedLongRowKey(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongWritableRowKey.java new file mode 100644 index 0000000..966e393 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestUnsignedLongWritableRowKey.java @@ -0,0 +1,31 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestUnsignedLongWritableRowKey extends TestLongWritableRowKey +{ + @Override + public AbstractVarIntRowKey createVarIntRowKey() { + return new UnsignedLongWritableRowKey(); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthByteArrayRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthByteArrayRowKey.java new file mode 100644 index 0000000..eb24a5a --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthByteArrayRowKey.java @@ -0,0 +1,54 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.BytesWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestVariableLengthByteArrayRowKey extends TestVariableLengthBytesWritableRowKey { + + @Override + public RowKey createRowKey() { + return new VariableLengthByteArrayRowKey(); + } + + @Override + public Object createObject() { + final int length = r.nextInt(1000); + final byte[] randomBytes = new byte[length]; + r.nextBytes(randomBytes); + return randomBytes; + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + byte[] b1 = ((byte[])o1); + byte[] b2 = ((byte[])o2); + + final int compareTo = new BytesWritable(b1).compareTo(new BytesWritable(b2)); + + return compareTo < 0 ? -1 : compareTo > 0 ? 1 : 0; + } + +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableRowKey.java new file mode 100644 index 0000000..8513058 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableRowKey.java @@ -0,0 +1,119 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestVariableLengthBytesWritableRowKey extends RandomRowKeyTestCase { + + @Override + public RowKey createRowKey() { + return new VariableLengthBytesWritableRowKey(); + } + + @Override + public Object createObject() { + final int length = r.nextInt(1000); + final byte[] randomBytes = new byte[length]; + r.nextBytes(randomBytes); + return new BytesWritable(randomBytes); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + BytesWritable b1 = ((BytesWritable) o1); + BytesWritable b2 = ((BytesWritable) o2); + + final int compareTo = b1.compareTo(b2); + + return compareTo < 0 ? -1 : compareTo > 0 ? 1 : 0; + } + + // some specific tests for the customized BCD format + + private byte[] encode(String decimalDigits, int expectedLength) { + final ImmutableBytesWritable result = new ImmutableBytesWritable(new byte[expectedLength]); + + new VariableLengthBytesWritableRowKey().encodedCustomizedReversedPackedBcd(decimalDigits, result); + + return result.get(); + } + + private String decode(byte[] input) { + return new VariableLengthBytesWritableRowKey().decodeCustomizedReversedPackedBcd( + new ImmutableBytesWritable(input), 0, input.length); + } + + @Test + public void testEncode() { + // 0 encodes to 0x3 + terminator nibble 0x01 + assertArrayEquals(new byte[]{0x31}, encode("0", 1)); + // etc... + assertArrayEquals(new byte[]{0x41}, encode("1", 1)); + assertArrayEquals(new byte[]{0x51}, encode("2", 1)); + assertArrayEquals(new byte[]{0x61}, encode("3", 1)); + assertArrayEquals(new byte[]{0x71}, encode("4", 1)); + + assertArrayEquals(new byte[]{0x33}, encode("00", 1)); + assertArrayEquals(new byte[]{0x34}, encode("01", 1)); + assertArrayEquals(new byte[]{0x35}, encode("02", 1)); + assertArrayEquals(new byte[]{0x36}, encode("03", 1)); + assertArrayEquals(new byte[]{0x37}, encode("04", 1)); + assertArrayEquals(new byte[]{0x39}, encode("05", 1)); + assertArrayEquals(new byte[]{0x3A}, encode("06", 1)); + assertArrayEquals(new byte[]{0x3C}, encode("07", 1)); + assertArrayEquals(new byte[]{0x3E}, encode("08", 1)); + assertArrayEquals(new byte[]{0x3F}, encode("09", 1)); + + assertArrayEquals(new byte[]{0x45}, encode("12", 1)); + assertArrayEquals(new byte[]{0x45, 0x61}, encode("123", 2)); + assertArrayEquals(new byte[]{0x45, 0x67}, encode("1234", 2)); + + // a special case (encoded value is 0x9E which is actually a negative byte when interpreted as signed value) + byte expected = (byte) 0x9E; // decimal -98 + assertArrayEquals(new byte[]{expected}, encode("58", 1)); + } + + @Test + public void testDecode() { + assertEquals("0", decode(new byte[]{0x31})); + assertEquals("1", decode(new byte[]{0x41})); + assertEquals("2", decode(new byte[]{0x51})); + assertEquals("3", decode(new byte[]{0x61})); + assertEquals("4", decode(new byte[]{0x71})); + + assertEquals("12", decode(new byte[]{0x45})); + assertEquals("123", decode(new byte[]{0x45, 0x61})); + assertEquals("1234", decode(new byte[]{0x45, 0x67})); + + assertEquals("123", decode(new byte[]{0x45, 0x61, 0x00})); // stuff after termination nibble (0x01) is ignored + } + +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableWithFixedLengthPrefixRowKey.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableWithFixedLengthPrefixRowKey.java new file mode 100644 index 0000000..5f8a814 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/orderly/TestVariableLengthBytesWritableWithFixedLengthPrefixRowKey.java @@ -0,0 +1,54 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.io.BytesWritable; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestVariableLengthBytesWritableWithFixedLengthPrefixRowKey extends RandomRowKeyTestCase { + + @Override + public RowKey createRowKey() { + return new VariableLengthBytesWritableRowKey(r.nextInt(10)); + } + + @Override + public Object createObject() { + final int length = r.nextInt(1000) + 10; + final byte[] randomBytes = new byte[length]; + r.nextBytes(randomBytes); + return new BytesWritable(randomBytes); + } + + @Override + public int compareTo(Object o1, Object o2) { + if (o1 == null || o2 == null) + return (o1 != null ? 1 : 0) - (o2 != null ? 1 : 0); + + BytesWritable b1 = ((BytesWritable) o1); + BytesWritable b2 = ((BytesWritable) o2); + + final int compareTo = b1.compareTo(b2); + + return compareTo < 0 ? -1 : compareTo > 0 ? 1 : 0; + } + +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalExample.java new file mode 100644 index 0000000..ae55113 --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalExample.java @@ -0,0 +1,79 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.math.BigDecimal; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +public class BigDecimalExample +{ + /* Simple examples showing serialization lengths with BigDecimalRow Key */ + public void lengthExamples() throws Exception { + BigDecimalRowKey i = new BigDecimalRowKey(); + + System.out.println("serialize(null) length - " + i.serialize(null).length); + System.out.println("serialize(58.75) length - " + + i.serialize(new BigDecimal(58.75)).length); + System.out.println("serialize(107-e902) length - " + + i.serialize(new BigDecimal("107e-902")).length); + + i.setOrder(Order.DESCENDING); + System.out.println("descending serialize (null) - length " + + i.serialize(null).length); + System.out.println("descending serialize (57) - length " + + i.serialize(new BigDecimal(57)).length); + } + + /* Simple examples showing serialization tests with BD/LazyBigDecimal */ + public void serializationExamples() throws Exception { + BigDecimalRowKey i = new BigDecimalRowKey(); + LazyBigDecimalRowKey l = new LazyBigDecimalRowKey(); + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + byte[] b; + BigDecimal bd; + + /* Serialize and deserialize into an immutablebyteswritable */ + bd = new BigDecimal("107e-902"); + b = new byte[i.getSerializedLength(bd)]; + buffer.set(b); + i.serialize(bd, buffer); + buffer.set(b, 0, b.length); + System.out.println("deserialize(serialize(107e-902)) = " + + i.deserialize(buffer)); + + /* Serialize and deserialize into a byte array (descending sort) + * using lazybigdecimal + */ + l.setOrder(Order.DESCENDING); + System.out.println("deserialize(serialize(0)) = " + + l.getBigDecimal( + (ImmutableBytesWritable)l.deserialize(l.serialize(BigDecimal.ZERO)))); + + /* Serialize and deserialize NULL into a byte array */ + System.out.println("deserialize(serialize(NULL)) = " + + i.deserialize(i.serialize(null))); + } + + public static void main(String[] args) throws Exception { + BigDecimalExample e = new BigDecimalExample(); + e.lengthExamples(); + e.serializationExamples(); + } +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleExample.java new file mode 100644 index 0000000..6585ff2 --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleExample.java @@ -0,0 +1,77 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.DoubleWritable; + +public class DoubleExample +{ + /* Simple examples showing serialization lengths with Double Row Key */ + public void lengthExamples() throws Exception { + DoubleRowKey d = new DoubleRowKey(); + + System.out.println("serialize(null) length - " + d.serialize(null).length); + System.out.println("serialize(57.190235) length - " + + d.serialize(57.923924).length); + System.out.println("serialize(1000000.999) length - " + + d.serialize(1000000.99).length); + + d.setOrder(Order.DESCENDING); + System.out.println("descending serialize (null) - length " + + d.serialize(null).length); + System.out.println("descending serialize (57) - length " + + d.serialize(57d).length); + } + + /* Simple examples showing serialization tests with DoubleWritable Row Key */ + public void serializationExamples() throws Exception { + DoubleWritableRowKey d = new DoubleWritableRowKey(); + DoubleWritable w = new DoubleWritable(); + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + byte[] b; + + /* Serialize and deserialize into an immutablebyteswritable */ + w.set(-93214.920352); + b = new byte[d.getSerializedLength(w)]; + buffer.set(b); + d.serialize(w, buffer); + buffer.set(b, 0, b.length); + System.out.println("deserialize(serialize(-93214.920352)) = " + + ((DoubleWritable)d.deserialize(buffer)).get()); + + /* Serialize and deserialize into a byte array (descending sort) + */ + d.setOrder(Order.DESCENDING); + w.set(0); + System.out.println("deserialize(serialize(0)) = " + + ((DoubleWritable)d.deserialize(d.serialize(w))).get()); + + /* Serialize and deserialize NULL into a byte array */ + System.out.println("deserialize(serialize(NULL)) = " + + d.deserialize(d.serialize(null))); + } + + public static void main(String[] args) throws Exception { + DoubleExample e = new DoubleExample(); + e.lengthExamples(); + e.serializationExamples(); + } +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongExample.java new file mode 100644 index 0000000..bc9f42b --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongExample.java @@ -0,0 +1,73 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.LongWritable; + +public class FixedLongExample +{ + /* Simple examples showing serialization lengths with + * FixedUnsignedLongRow Key + */ + public void lengthExamples() throws Exception { + FixedUnsignedLongRowKey i = new FixedUnsignedLongRowKey(); + + System.out.println("serialize(2^63) length - " + + i.serialize(Long.MIN_VALUE).length); + System.out.println("serialize(57) length - " + i.serialize(57l).length); + System.out.println("serialize(293) length - " + i.serialize(293l).length); + + i.setOrder(Order.DESCENDING); + System.out.println("descending serialize (57) - length " + + i.serialize(57l).length); + System.out.println("descending serialize (2^32) - length " + + i.serialize(1l << 32).length); + } + + /* Simple examples showing serialization tests with FixedLongWritable */ + public void serializationExamples() throws Exception { + FixedLongWritableRowKey l = new FixedLongWritableRowKey(); + LongWritable w = new LongWritable(); + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + byte[] b; + + /* Serialize and deserialize into an immutablebyteswritable */ + w.set(-93214); + b = new byte[l.getSerializedLength(w)]; + buffer.set(b); + l.serialize(w, buffer); + buffer.set(b, 0, b.length); + System.out.println("deserialize(serialize(-93214)) = " + + ((LongWritable)l.deserialize(buffer)).get()); + + /* Serialize and deserialize into a byte array (descending sort). */ + l.setOrder(Order.DESCENDING); + w.set(0); + System.out.println("deserialize(serialize(0)) = " + + ((LongWritable)l.deserialize(l.serialize(w))).get()); + } + + public static void main(String[] args) throws Exception { + FixedLongExample e = new FixedLongExample(); + e.lengthExamples(); + e.serializationExamples(); + } +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/IntExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/IntExample.java new file mode 100644 index 0000000..9728611 --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/IntExample.java @@ -0,0 +1,76 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.io.IntWritable; + +public class IntExample +{ + /* Simple examples showing serialization lengths with Integer Row Key */ + public void lengthExamples() throws Exception { + IntegerRowKey i = new IntegerRowKey(); + + System.out.println("serialize(null) length - " + i.serialize(null).length); + System.out.println("serialize(57) length - " + i.serialize(57).length); + System.out.println("serialize(293) length - " + i.serialize(293).length); + + i.setOrder(Order.DESCENDING); + System.out.println("descending serialize (null) - length " + + i.serialize(null).length); + System.out.println("descending serialize (57) - length " + + i.serialize(57).length); + } + + /* Simple examples showing serialization tests with IntWritable Row Key */ + public void serializationExamples() throws Exception { + IntWritableRowKey i = new IntWritableRowKey(); + IntWritable w = new IntWritable(); + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + byte[] b; + + /* Serialize and deserialize into an immutablebyteswritable */ + w.set(-93214); + b = new byte[i.getSerializedLength(w)]; + buffer.set(b); + i.serialize(w, buffer); + buffer.set(b, 0, b.length); + System.out.println("deserialize(serialize(-93214)) = " + + ((IntWritable)i.deserialize(buffer)).get()); + + /* Serialize and deserialize into a byte array (descending sort, + * with two reserved bits set to 0x3) + */ + i.setReservedBits(2).setReservedValue(0x3).setOrder(Order.DESCENDING); + w.set(0); + System.out.println("deserialize(serialize(0)) = " + + ((IntWritable)i.deserialize(i.serialize(w))).get()); + + /* Serialize and deserialize NULL into a byte array */ + System.out.println("deserialize(serialize(NULL)) = " + + i.deserialize(i.serialize(null))); + } + + public static void main(String[] args) throws Exception { + IntExample e = new IntExample(); + e.lengthExamples(); + e.serializationExamples(); + } +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StringExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StringExample.java new file mode 100644 index 0000000..5a6c9e2 --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StringExample.java @@ -0,0 +1,114 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.Text; + +/** + * Examples of using the String-like RowKey types. + */ +public class StringExample { + + /** + * Simple examples showing serialization lengths with TextRowKey. + */ + public void lengthExamples() throws Exception { + TextRowKey i = new TextRowKey(); + + System.out.println("serialize(hello) length - " + + i.serialize(new Text("hello")).length); + System.out.println("serialize(null) length - " + i.serialize(null).length); + System.out.println("serialize('') length - " + + i.serialize(new Text("")).length); + System.out.println("serialize(foobar) length - " + + i.serialize(new Text("foobar")).length); + + i.setOrder(Order.DESCENDING); + System.out.println("descending serialize (null) - length " + + i.serialize(null).length); + System.out.println("descending serialize (hello) - length " + + i.serialize(new Text("hello")).length); + System.out.println("descending serialize ('') - length " + + i.serialize(new Text("")).length); + } + + /** + * Simple examples showing serialization tests with StringRowKey. + */ + public void serializationExamples() throws Exception { + StringRowKey l = new StringRowKey(); + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + byte[] b; + + /* Serialize and deserialize into an immutablebyteswritable */ + b = new byte[l.getSerializedLength("hello")]; + buffer.set(b); + l.serialize("hello", buffer); + buffer.set(b); + System.out.println("deserialize(serialize(hello)) = " + + l.deserialize(buffer)); + + /* Serialize and deserialize into a byte array (descending sort). */ + l.setOrder(Order.DESCENDING); + System.out.println("deserialize(serialize('')) = " + + l.deserialize(l.serialize(""))); + + /* Serialize and deserialize NULL into a byte array */ + System.out.println("deserialize(serialize(NULL)) = " + + l.deserialize(l.serialize(null))); + } + + /** + * Demonstrates serialization where termination is necessary. + */ + public void mustTerminateExamples() throws Exception { + UTF8RowKey u = new UTF8RowKey(); + + System.out.println("length(serialize(foobar)) = " + + u.serialize(Bytes.toBytes("foobar")).length); + System.out.println("deserialize(serialize(foobar)) = " + + Bytes.toString((byte[])u.deserialize(u.serialize( + Bytes.toBytes("foobar"))))); + + System.out.println("length(serialize(null)) = " + u.serialize(null).length); + System.out.println("deserialize(serialize(null)) = " + + u.deserialize(u.serialize(null))); + + u.setTermination(Termination.MUST); + System.out.println("mustTerminate length(serialize(foobar)) = " + + u.serialize(Bytes.toBytes("foobar")).length); + System.out.println("mustTerminate - deserialize(serialize(foobar)) = " + + Bytes.toString((byte[])u.deserialize(u.serialize( + Bytes.toBytes("foobar"))))); + + System.out.println("mustTerminate length(serialize(null)) = " + + u.serialize(null).length); + System.out.println("mustTerminate deserialize(serialize(null)) = " + + u.deserialize(u.serialize(null))); + } + + public static void main(String[] args) throws Exception { + StringExample e = new StringExample(); + e.lengthExamples(); + e.serializationExamples(); + e.mustTerminateExamples(); + } +} diff --git a/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StructExample.java b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StructExample.java new file mode 100644 index 0000000..5c06d74 --- /dev/null +++ b/hbase-examples/src/main/java/org/apache/hadoop/hbase/util/orderly/StructExample.java @@ -0,0 +1,195 @@ +/* Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hbase.util.orderly; + +import java.math.BigDecimal; + +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; + +/** + * Examples of using the {@link StructRowKey} and related utilities for + * serialization of compound types. + */ +public class StructExample { + BigDecimalRowKey bd; + DoubleRowKey d; + FixedUnsignedLongRowKey ul; + StringRowKey s; + IntegerRowKey i; + + public StructExample() { + bd = new BigDecimalRowKey(); + ul = new FixedUnsignedLongRowKey(); + s = new StringRowKey(); + d = new DoubleRowKey(); + i = new IntegerRowKey(); + } + + void printStruct(Object obj, String prefix) { + Object[] o = (Object[]) obj; + if (o == null) { + System.out.println(prefix + "Struct: NULL"); + return; + } + + System.out.println(prefix + "Struct: "); + int pos = 0; + for (Object field : o) { + if (field instanceof Object[]) { + System.out.println(prefix + " Field " + pos++ + " nested: "); + printStruct(field, prefix + "\t"); + } else { + System.out.println(prefix + " Field " + pos++ + " = " + field); + } + } + } + + void printStruct(Object obj) { printStruct(obj, ""); } + + /** + * Simple examples showing serialization lengths with StructRowKey. + */ + public void structLengthExample() throws Exception { + StructRowKey fin = new StructRowKey(new RowKey[] { i, bd, s }); + System.out.println("struct(null) length - " + + fin.serialize(new Object[] { null, null, null }).length); + + /* Show descending sort */ + fin.setOrder(Order.DESCENDING); + System.out.println("struct DESC (null) length - " + + fin.serialize(new Object[] { null, null, null }).length); + + /* Back to ascending */ + fin.setOrder(Order.ASCENDING); + System.out.println("struct (293, 2934, hi) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), "hi" }).length); + System.out.println("struct (293, 2934, null) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), null }).length); + + /* Force termination */ + fin.setTermination(Termination.MUST); + System.out.println("mustTerminate struct (293, 2934, hi) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), "hi" }).length); + System.out.println("mustTerminate struct (293, 2934, null) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), null }).length); + fin.setTermination(Termination.SHOULD_NOT); + + fin.setOrder(Order.DESCENDING); + System.out.println("struct DESC (293, 2934, hi) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), "hi" }).length); + System.out.println("struct DESC (293, 2934, null) length - " + + fin.serialize(new Object[] { 293, new BigDecimal("2934"), null }).length); + fin.setOrder(Order.ASCENDING); + } + + /** + * Simple examples showing serialization of StructRowKey and (un)marshaling + * to and from byte[] and ImmutableBytesWritable. + */ + public void structSerializationExample() throws Exception { + StructRowKey r = new StructRowKey(new RowKey[] { s, bd }); + + /* Serialize, deserialize using byte array */ + System.out.println("deserialize(serialize(foobarbaz, 3.14159e102))"); + printStruct(r.deserialize(r.serialize(new Object[] { "foobarbaz", new + BigDecimal(3.14159e102)}))); + + /* Serialize, deserialize using ImmutableBytesWritable */ + ImmutableBytesWritable buffer = new ImmutableBytesWritable(); + Object[] o = new Object[] { "helloworld", null }; + byte[] b = new byte[r.getSerializedLength(o)]; + buffer.set(b); + r.serialize(o, buffer); + buffer.set(b); + System.out.println("deserialize(serialize(helloworld, null))"); + printStruct(r.deserialize(buffer)); + } + + /** + * Simple example of using a prefix match. Store a row using (string, double, + * bigdecimal), but retrieve it using (string, double) and just (string). + */ + public void prefixExample() throws Exception { + StructRowKey rk = new StructRowKey(new RowKey[] { s, d, bd }), + prefix1 = new StructRowKey(new RowKey[] { s, d}), + prefix2 = new StructRowKey(new RowKey[] { s }); + + byte[] b = rk.serialize(new Object[] { "hello", 3.14159, + new BigDecimal("0.93e-102") }); + + System.out.println("Deserialize first two fields of " + + "(hello, 3.14159, 0.93e-102)"); + printStruct(prefix1.deserialize(b)); + + System.out.println("Deserialize first field of " + + "(hello, 3.14159, 0.93e-102)"); + printStruct(prefix2.deserialize(b)); + + System.out.println("Deserialize all fields of " + + "(hello, 3.14159, 0.93e-102)"); + printStruct(rk.deserialize(b)); + } + + /** + * Examples of using the {@link StructBuilder} and {@link StructIterator} + * utilities. + */ + public void builderAndIteratorExample() throws Exception { + StructRowKey rk = new StructBuilder().add(s).add(d).add(ul).toRowKey(); + Object[] o = new Object[] { "hello", 3.14159, 17L }; + StructIterator iterator = rk.iterateOver(rk.serialize(o)).iterator(); + + /* Use the deserialize, skip classes to selectively deserialze objects */ + System.out.println("Printing fields 1, 3 of (hello, 3.14159, 17"); + System.out.println(iterator.deserialize()); + iterator.skip(); + System.out.println(iterator.deserialize()); + + /* Just treat StructRowKey as an iterable -- iterates over all row key + * fields, deserializing each object in succession + */ + System.out.println("Printing fields all fields (hello, 3.14159, 17"); + for (Object field : rk) + System.out.println(field); + } + + /** + * An example of nesting struct within a struct. + */ + public void nestedStructExample() throws Exception { + StructBuilder b = new StructBuilder(); + StructRowKey n = b.add(ul).add(bd).toRowKey(), + rk = b.reset().add(s).add(n).add(i).toRowKey(); + System.out.println("Serializing (outerString, (17, 940.2e-87)), 42"); + Object[] o = new Object[] { "outerString", + new Object[] { 17L, new BigDecimal("940.2e-87") }, 42}; + byte[] a = rk.serialize(o); + System.out.println("Length " + a.length); + printStruct(rk.deserialize(a)); + } + + public static void main(String[] args) throws Exception { + StructExample e = new StructExample(); + e.structLengthExample(); + e.structSerializationExample(); + e.prefixExample(); + e.builderAndIteratorExample(); + e.nestedStructExample(); + } +}