From 1e2176fb4c04874fdc44a7967c8ba2279513a519 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Tue, 9 Jul 2013 08:47:43 -0700 Subject: [PATCH] HBASE-8201 OrderedBytes provides order-preserving serialization OrderedBytes provides a serialization format in which the resulting byte[] retains the same sort order as the natural types. Serialized formats can be inspected and decoded without forward knowledge of their content. Implementations are provided for integer and floating point numbers with 32- and 64-bits of precision, numeric values of arbitrary precision, Strings, and byte[]s. Utility methods for counting and skipping encoded entries are also provided. The encoding format is modeled after the SQLite4 key encoding format. Implementations of variable-length encodings are very similar. fixed-width encodings are modeled after the fixed-width formats provided by the Orderly library. Javadocs on the OrderedBytes class describe the encoding format in detail. See http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki for additional context. Notable deviation from the sqlite4 spec include: - Different header byte values. This is to give users more room to place their own, custom encoding extensions as they see fit. - Blob-last is modified to include a termination byte of 0x00. This is necessary in order to maintain reverse sort order of empty values. This may make the two Blob encodings redundant. - Support for fixed-length integer and float encodings. --- .../org/apache/hadoop/hbase/util/HNumeric.java | 187 +++ .../java/org/apache/hadoop/hbase/util/Order.java | 73 ++ .../org/apache/hadoop/hbase/util/OrderedBytes.java | 1202 ++++++++++++++++++++ .../apache/hadoop/hbase/util/TestOrderedBytes.java | 748 ++++++++++++ 4 files changed, 2210 insertions(+) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/HNumeric.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HNumeric.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HNumeric.java new file mode 100644 index 0000000..b8738bb --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HNumeric.java @@ -0,0 +1,187 @@ +package org.apache.hadoop.hbase.util; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * HNumeric represents a numeric value for use with {@link OrderedBytes}. This + * is necessary because {@link BigDecimal} does not support a representation + * for NaN or +/-Inf. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class HNumeric extends Number { + private static final long serialVersionUID = -4488167747731287844L; + + /** + * The context used for numerical operations. + */ + public static final MathContext MATH_CONTEXT = new MathContext(31, RoundingMode.HALF_UP); + + public static final HNumeric NaN = new HNumeric(Double.NaN); + public static final HNumeric NEGATIVE_INFINITY = new HNumeric(Double.NEGATIVE_INFINITY); + public static final HNumeric ZERO = new HNumeric(0.0); + public static final HNumeric POSITIVE_INFINITY = new HNumeric(Double.POSITIVE_INFINITY); + + private final boolean isR; + private final boolean isZ; + private final long z; + private final double r; + private final BigDecimal bd; + + private transient int hashCode = 0; + + /** + * Create an HNumeric instance over a double. + */ + public HNumeric(double val) { + isR = true; + r = val; + isZ = false; + z = 0; + bd = null; + } + + /** + * Create an HNumeric instance over a long. + */ + public HNumeric(long val) { + isZ = true; + z = val; + isR = false; + r = 0.0; + bd = null; + } + + /** + * Create an HNumeric instance over a BigDecimal . + */ + public HNumeric(BigDecimal val) { + if (null == val) throw new NullPointerException(); + + // see if this can be a long instead + boolean isLong = false; + long lng = 0; + try { + lng = val.longValueExact(); + isLong = true; + } catch (ArithmeticException e) { + } + + if (isLong) { + isZ = true; + z = lng; + isR = false; + r = 0.0; + bd = null; + } else { + // doesn't fit in a long, fall back to BD + bd = val.round(MATH_CONTEXT); + isZ = false; + isR = false; + z = 0; + r = 0.0; + } + } + + /** + * Returns true if the Number is an Integer and + * fits in a long, false otherwise. + */ + public boolean isInteger() { + return isZ; + } + + /** + * Returns true if the Number is a Real and fits + * in a double, false otherwise. + */ + public boolean isReal() { + return isR; + } + + /** + * Returns true if the Number is infinitely large + * in magnitude, false otherwise. + */ + public boolean isInfinite() { + return isR && Double.isInfinite(r); + } + + /** + * Returns true if the Number is a Not-a-Number + * (NaN) value, false otherwise. + */ + public boolean isNaN() { + return isR && Double.isNaN(r); + } + + /** + * Retrieve the value as a BigDecimal. This will silently + * promote a double or long to a + * BigDecimal when possible, so use it only if a primitive + * value is not available. Check availability using {@link #isInteger()} and + * {@link #isReal()}. + * @throws NumberFormatException if the + * Number is infinite or NaN. + */ + public BigDecimal exactValue() { + return null == bd ? isR ? BigDecimal.valueOf(r) : BigDecimal.valueOf(z) : bd; + } + + @Override + public double doubleValue() { + return isReal() ? r : isInteger() ? (double) z : bd.doubleValue(); + } + + @Override + public int intValue() { + return isInteger() ? (int) z : isReal() ? (int) r : bd.intValue(); + } + + @Override + public long longValue() { + return isInteger() ? z : isReal() ? (long) r : bd.longValue(); + } + + @Override + public float floatValue() { + return isReal() ? (float) r : isInteger() ? (float) z : bd.floatValue(); + } + + @Override + public String toString() { + return isReal() ? Double.toString(r) : isInteger() ? Long.toString(z) : bd.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (null == o) return false; + if (!(o instanceof HNumeric)) return false; + HNumeric that = (HNumeric) o; + if (this.isInteger() && that.isInteger()) return this.longValue() == that.longValue(); + if (this.isReal() && that.isReal()) return this.doubleValue() == that.doubleValue(); + return 0 == this.exactValue().compareTo(that.exactValue()); + } + + @Override + public int hashCode() { + if (0 != hashCode) return hashCode; + int result = 1; + if (isInteger()) { + result = result * 23 + (int) (z ^ (z >>> 32)); + } else if (isReal()) { + long bits = Double.doubleToLongBits(r); + result = result * 13 + (int) (bits ^ (bits >>> 32)); + } else { + result = result * 17 + bd.hashCode(); + } + hashCode = result; + return result; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java new file mode 100644 index 0000000..b00fa11 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * The sort order of a byte[] or HDataType instance, + * either ASCENDING or DESCENDING. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum Order { + ASCENDING, DESCENDING; + + static final byte mask = (byte) 0xff; + + /** + * Returns the adjusted trichotomous value according to the ordering imposed + * by this Order. + */ + public int cmp(int cmp) { + return cmp * (this == ASCENDING ? 1 : -1); + } + + /** + * Apply order to the byte b. + */ + public byte apply(byte val) { + return (byte) (this == ASCENDING ? val : val ^ mask); + } + + /** + * Apply order to the byte array a. + */ + public void apply(byte[] val) { + if (this != DESCENDING) return; + for (int i = 0; i < val.length; i++) { + val[i] ^= mask; + } + } + + /** + * Apply order to the byte array a according to the Order. + */ + public void apply(byte[] val, int offset, int length) { + if (this != DESCENDING) return; + for (int i = 0; i < length; i++) { + val[offset + i] ^= mask; + } + } + + @Override + public String toString() { + return this == ASCENDING ? "asc" : "dsc"; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java new file mode 100644 index 0000000..d2ec1de --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java @@ -0,0 +1,1202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Utility class that handles ordered byte arrays. That is, unlike + * {@link Bytes}, these methods produce byte arrays which maintain the sort + * order of the original values. + *

Encoding Format summary

+ *

+ * Each value is encoded as one or more bytes. The first byte of the encoding, + * its meaning, and a terse description of the bytes that follow is given by + * the following table: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Content TypeEncoding
NULL0x05
negative infinity0x07
negative large0x08, ~E, ~M
negative medium0x13-E, ~M
negative small0x14, -E, ~M
zero0x15
positive small0x16, ~-E, M
positive medium0x17+E, M
positive large0x22, E, M
positive infinity0x23
NaN0x25
fixed-length 32-bit integer0x27, I
fixed-length 64-bit integer0x28, I
fixed-length 32-bit float0x30, F
fixed-length 64-bit float0x31, F
text0x33, T
binary0x35, B
final binary0x36, X
+ *

+ * + *

Null Encoding

+ *

+ * Each value that is a NULL encodes as a single byte of 0x05. Since every + * other value encoding begins with a byte greater than 0x05, this forces NULL + * values to sort first. + *

+ *

Text Encoding

+ *

+ * Each text value begins with a single byte of 0x33 and ends with a single + * byte of 0x00. There are zero or more intervening bytes that encode the text + * value. The intervening bytes are chosen so that the encoding will sort in + * the desired collating order. The intervening bytes may not contain a 0x00 + * character; the only 0x00 byte allowed in a text encoding is the final byte. + *

+ *

+ * The text encoding ends in 0x00 in order to ensure that when there are two + * strings where one is a prefix of the other that the shorter string will + * sort first. + *

+ *

Binary Encoding

+ *

+ * The encoding of binaries fields is different depending on whether or not + * the value to be encoded is the last value (the right-most value) in the + * key. + *

+ *

+ * Each value that is BINARY that is not the last value of the key begins with + * a single byte of 0x35 and ends with a single byte of 0x00. There are zero + * or more intervening bytes that encode the binary value. None of the + * intervening bytes may be zero, as this conflicts with the termination + * marker. Thus, each of the intervening bytes contains 7 bits of blob content + * with a 1 in the high-order bit (the 0x80 bit). The final byte before the + * 0x00 contains any left-over bits of the blob content. + *

+ *

+ * When the very last value of a key is BINARY, then it is encoded as a single + * byte of 0x36 and is followed by a byte-for-byte copy of the BINARY value. + * This alternative encoding is more efficient, but it only works if there are + * no subsequent values in the key, since there is no termination mark on the + * BLOB being encoded. + *

+ *

Variable-length Numeric Encoding

+ *

+ * Numeric values must be coded so as to sort in numeric order. We assume that + * numeric values can be both integer and floating point values. The wrapper + * class {@link HNumeric} is used to smooth over values decoded using this + * scheme. + *

+ *

+ * Simplest cases first: If the numeric value is a NaN, then the encoding is a + * single byte of 0x25. This causes NaN values to sort after to every other + * numeric value. + *

+ *

+ * If the numeric value is a negative infinity then the encoding is a single + * byte of 0x07. Since every other numeric value except NaN has a larger + * initial byte, this encoding ensures that negative infinity will sort prior + * to every other numeric value other than NaN. + *

+ *

+ * If the numeric value is a positive infinity then the encoding is a single + * byte of 0x23. Every other numeric value encoding begins with a smaller + * byte, ensuring that positive infinity always sorts last among numeric + * values. 0x23 is also smaller than 0x33, the initial byte of a text value, + * ensuring that every numeric value sorts before every text value. + *

+ *

+ * If the numeric value is exactly zero then it is encoded as a single byte of + * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 + * and finite positive values will have initial bytes of 0x16 through 0x22. + *

+ *

+ * For all numeric values, we compute a mantissa M and an exponent E. The + * mantissa is a base-100 representation of the value. The exponent E + * determines where to put the decimal point. + *

+ *

+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X≥0 and X≤99) then the byte value will + * be 2*X+1 for every byte of the mantissa, except for the last byte which + * will be 2*X+0. The mantissa must be the minimum number of bytes necessary + * to represent the value; trailing X==0 digits are omitted. This means that + * the mantissa will never contain a byte with the value 0x00. + *

+ *

+ * If we assume all digits of the mantissa occur to the right of the decimal + * point, then the exponent E is the power of one hundred by which one must + * multiply the mantissa to recover the original value. + *

+ *

+ * Values are classified as large, medium, or small according to the value of + * E. If E is 11 or more, the value is large. For E between 0 and 10, the + * value is medium. For E less than zero, the value is small. + *

+ *

+ * Large positive values are encoded as a single byte 0x22 followed by E as a + * varint and then M. Medium positive values are a single byte of 0x17+E + * followed by M. Small positive values are encoded as a single byte 0x16 + * followed by the ones-complement of the varint for -E followed by M. + *

+ *

+ * Small negative values are encoded as a single byte 0x14 followed by -E as a + * varint and then the ones-complement of M. Medium negative values are + * encoded as a byte 0x13-E followed by the ones-complement of M. Large + * negative values consist of the single byte 0x08 followed by the + * ones-complement of the varint encoding of E followed by the ones-complement + * of M. + *

+ *

Fixed-length Integer Encoding

+ *

+ * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte + * format. All 8-byte integers are serialized to the equivelant 9-byte format. + * Serialization is performed by writing a header byte, inverting the integer + * sign bit and writing the resulting bytes to the byte array in big endian + * order. + *

+ *

Fixed-length Floating Point Encoding

+ *

+ * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte + * encoding format, respectively. The format is identical, save for the + * precision respected in each step of the operation. + *

+ * This format ensures the following total ordering of floating point values: + * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < + * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... < + * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN + *

+ *

+ * Floating point numbers are encoded as specified in IEEE 754. A 32-bit + * single precision float consists of a sign bit, 8-bit unsigned exponent + * encoded in offset-127 notation, and a 23-bit significand. The format is + * described further in the Single Precision + * Floating Point Wikipedia page + *

+ *

+ * The value of a normal float is -1 sign bit × + * 2exponent - 127 × 1.significand + *

+ *

+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *

+ *

+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is less + * than negative 1), and that all negative numbers compare less than any + * positive number. To accomplish this, we invert the sign bit of all floating + * point numbers, and we also invert the exponent and significand bits if the + * floating point number was negative. + *

+ *

+ * More specifically, we first store the floating point bits into a 32-bit int + * j using {@link Float#floatToIntBits}. This method collapses + * all NaNs into a single, canonical NaN value but otherwise leaves the bits + * unchanged. We then compute + *

+ * + *
+ * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE
+ * 
+ *

+ * which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of j in most significant byte + * order is equivalent to performing a single precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't + * compare equal to anything when performing floating point comparisons). + *

+ *

+ * The resulting integer is then converted into a byte array by serializing + * the integer one byte at a time in most significant byte order. The + * serialized integer is prefixed by a single header byte. All serialized + * values are 5 bytes in length. + *

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OrderedBytes { + + private static final Charset UTF8 = Charset.forName("UTF-8"); + + // constants used for numeric {en,de}coding + private static final BigDecimal NEG_ONE = BigDecimal.ONE.negate(); + private static final BigDecimal E8 = BigDecimal.valueOf(1e8); + private static final BigDecimal E32 = BigDecimal.valueOf(1e32); + private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2); + private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10); + + /** + * Perform unsigned comparison between two long values. Conforms to the same + * interface as {@link Comparator#compare(Object, Object)}. + */ + private static int unsignedCmp(long x1, long x2) { + int cmp; + if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; + // invert the result when either value is negative + if ((x1 < 0) != (x2 < 0)) return -cmp; + return cmp; + } + + /** + * Write a 32-bit unsigned integer to dst as 4 big-endian + * bytes. + * @return number of bytes written. + */ + private static int putUint32(ByteBuffer dst, int val) { + dst.put((byte) (val >>> 24)) + .put((byte) (val >>> 16)) + .put((byte) (val >>> 8)) + .put((byte) val); + return 4; + } + + /** + * Encode an unsigned 64-bit integer val into dst. + * Compliment the encoded value when comp is true. + */ + @VisibleForTesting + static int putVaruint64(ByteBuffer dst, long val, boolean comp) { + int w, y, start = dst.position(); + byte[] a = dst.array(); + Order ord = comp ? Order.DESCENDING : Order.ASCENDING; + if (-1 == unsignedCmp(val, 241L)) { + dst.put((byte) val); + ord.apply(a, start, 1); + return 1; + } + if (-1 == unsignedCmp(val, 2288L)) { + y = (int) (val - 240); + dst.put((byte) (y / 256 + 241)) + .put((byte) (y % 256)); + ord.apply(a, start, 2); + return 2; + } + if (-1 == unsignedCmp(val, 67824L)) { + y = (int) (val - 2288); + dst.put((byte) 249) + .put((byte) (y / 256)) + .put((byte) (y % 256)); + ord.apply(a, start, 3); + return 3; + } + y = (int) (val & 0xffffffff); + w = (int) (val >>> 32); + if (w == 0) { + if (-1 == unsignedCmp(y, 16777216L)) { + dst.put((byte) 250) + .put((byte) (y >>> 16)) + .put((byte) (y >>> 8)) + .put((byte) y); + ord.apply(a, start, 4); + return 4; + } + dst.put((byte) 251); + putUint32(dst, y); + ord.apply(a, start, 5); + return 5; + } + if (-1 == unsignedCmp(w, 256L)) { + dst.put((byte) 252) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 6); + return 6; + } + if (-1 == unsignedCmp(w, 65536L)) { + dst.put((byte) 253) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 7); + return 7; + } + if (-1 == unsignedCmp(w, 16777216L)) { + dst.put((byte) 254) + .put((byte) (w >>> 16)) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 8); + return 8; + } + dst.put((byte) 255); + putUint32(dst, w); + putUint32(dst, y); + ord.apply(a, start, 9); + return 9; + } + + /** + * Inspect an encoded varu64 for it's encoded length. Does not modify + * src's state. + * @param src source buffer + * @param comp if true, parse the compliment of the value. + * @return number of bytes consumed by this value + */ + @VisibleForTesting + static int lengthVaru64(ByteBuffer src, boolean comp) { + byte[] a = src.array(); + int i = src.position(); + int a0 = (comp ? a[i] ^ Order.mask : a[i]) & 0xff; + if (a0 <= 240) return 1; + if (a0 >= 241 && a0 <= 248) return 2; + if (a0 == 249) return 3; + if (a0 == 250) return 4; + if (a0 == 251) return 5; + if (a0 == 252) return 6; + if (a0 == 253) return 7; + if (a0 == 254) return 8; + if (a0 == 255) return 9; + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(a[i])); + } + + /** + * Decode a sequence of bytes in buff as an unsigned 64-bit + * integer. Compliment the encoded value when comp is true. + */ + @VisibleForTesting + static long getVaruint64(ByteBuffer buff, boolean comp) { + assert buff.remaining() >= lengthVaru64(buff, comp); + long ret; + byte x = buff.get(); + int a0 = (comp ? x ^ Order.mask : x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; + if (-1 == unsignedCmp(a0, 241)) { + return a0; + } + x = buff.get(); + a1 = (comp ? x ^ Order.mask : x) & 0xff; + if (-1 == unsignedCmp(a0, 249)) { + return (a0 - 241) * 256 + a1 + 240; + } + x = buff.get(); + a2 = (comp ? x ^ Order.mask : x) & 0xff; + if (a0 == 249) { + return 2288 + 256 * a1 + a2; + } + x = buff.get(); + a3 = (comp ? x ^ Order.mask : x) & 0xff; + if (a0 == 250) { + return (a1 << 16) | (a2 << 8) | a3; + } + x = buff.get(); + a4 = (comp ? x ^ Order.mask : x) & 0xff; + ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4; + if (a0 == 251) { + return ret; + } + x = buff.get(); + a5 = (comp ? x ^ Order.mask : x) & 0xff; + if (a0 == 252) { + return (ret << 8) | a5; + } + x = buff.get(); + a6 = (comp ? x ^ Order.mask : x) & 0xff; + if (a0 == 253) { + return (ret << 16) | (a5 << 8) | a6; + } + x = buff.get(); + a7 = (comp ? x ^ Order.mask : x) & 0xff; + if (a0 == 254) { + return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; + } + x = buff.get(); + a8 = (comp ? x ^ Order.mask : x) & 0xff; + return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8; + } + + /** + * Skip buff over the encoded bytes. + */ + static void skipVaruint64(ByteBuffer buff, boolean comp) { + buff.position(buff.position() + lengthVaru64(buff, comp)); + } + + /** + * Read significand digits from buff according to the magnitude + * of e. + * @param buff The source from which to read encoded digits. + * @param e The magnitude of the first digit read. + * @param comp Treat encoded bytes as compliments when comp is true. + * @return The decoded value. + */ + private static BigDecimal decodeSignificand(ByteBuffer buff, int e, boolean comp) { + // TODO: can this be made faster? + byte[] a = buff.array(); + BigDecimal m = BigDecimal.ZERO; + e--; + for (int i = buff.position();; i++) { + // base-100 digits are encoded as val * 2 + 1 except for the termination digit. + m = m.add( // m += + new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit] + BigDecimal.valueOf(((comp ? a[i] ^ Order.mask : a[i]) & 0xff) / 2))); + e--; + // detect termination digit + if (((comp ? a[i] ^ Order.mask : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + return m; + } + + /** + * Skip buff over the significand bytes. + */ + private static void skipSignificand(ByteBuffer buff, boolean comp) { + byte[] a = buff.array(); + for (int i = buff.position();; i++) { + if (((comp ? a[i] ^ Order.mask : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + } + + /** + * Encode the small positive floating point number val using + * the key encoding. The caller guarantees that val will be + * less than 1.0 and greater than 0.0. + *

+ * A floating point value is encoded as an integer exponent E + * and a mantissa M. The original value is equal to + * (M * 100^E). E is set to the smallest value + * possible without making M greater than or equal to 1.0. + *

+ *

+ * For this routine, E will always be zero or negative, since + * the original value is less than one. The encoding written by this routine + * is the ones-complement of the varint of the negative of E + * followed by the mantissa: + * + *

+   *   Encoding:   ~-E  M
+   * 
+ * + *

+ * @param buff The destination to which encoded digits are written. + * @param val The value to encode. + * @param ecomp Write the compliment of e to buff + * when ecomp is true. + * @param mcomp Write the compliment of M to buff + * when mcomp is true. + */ + private static void encodeNumericSmall(ByteBuffer buff, BigDecimal val, boolean ecomp, + boolean mcomp) { + // TODO: can this be done faster? + // assert 0.0 < dec < 1.0 + assert BigDecimal.ZERO.compareTo(val) < 0 && BigDecimal.ONE.compareTo(val) > 0; + int e = 0, d, startM; + Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING; + while (val.compareTo(EN10) < 0) { val = val.movePointRight(8); e += 4; } + while (val.compareTo(EN2) < 0) { val = val.movePointRight(2); e++; } + putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && val.compareTo(BigDecimal.ZERO) != 0; i++) { + val = val.movePointRight(2); + d = val.intValue(); + buff.put((byte) ((2 * d + 1) & 0xff)); + val = val.subtract(BigDecimal.valueOf(d)); + } + buff.array()[buff.position() - 1] &= 0xfe; + ord.apply(buff.array(), startM, buff.position() - startM); + } + + /** + * Encode the large positive floating point number val using + * the key encoding. The caller guarantees that val will be + * finite and greater than or equal to 1.0. + *

+ * A floating point value is encoded as an integer exponent E + * and a mantissa M. The original value is equal to + * (M * 100^E). E is set to the smallest value + * possible without making M greater than or equal to 1.0. + *

+ *

+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X>=0 and + * X<=99) then the byte value will be 2*X+1 for + * every byte of the mantissa, except for the last byte which will be + * 2*X+0. The mantissa must be the minimum number of bytes + * necessary to represent the value; trailing X==0 digits are + * omitted. This means that the mantissa will never contain a byte with the + * value 0x00. + *

+ *

+ * If E > 10, then this routine writes of E as a + * varint followed by the mantissa as described above. Otherwise, if + * E <= 10, this routine only writes the mantissa and leaves + * the E value to be encoded as part of the opening byte of the + * field by the calling function. + * + *

+   *   Encoding:  M       (if E<=10)
+   *              E M     (if E>10)
+   * 
+ *

+ *

+ * This routine returns the value of E. + *

+ * @param buff The destination to which encoded digits are written. + * @param val The value to encode. + * @param ecomp Write the compliment of e to buff + * when ecomp is true. + * @param mcomp Write the compliment of M to buff + * when mcomp is true. + * @return E(xponent) in base-100. + */ + private static int encodeNumericLarge(ByteBuffer buff, BigDecimal val, boolean ecomp, + boolean mcomp) { + // TODO: can this be done faster? + // assert val >= 0.0 + assert BigDecimal.ONE.compareTo(val) <= 0; + int e = 0, d, startM; + Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING; + while (val.compareTo(E32) >= 0 && e <= 350) { val = val.movePointLeft(32); e +=16; } + while (val.compareTo(E8) >= 0 && e <= 350) { val = val.movePointLeft(8); e+= 4; } + while (val.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { val = val.movePointLeft(2); e++; } + if (e > 10) putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && val.compareTo(BigDecimal.ZERO) != 0; i++) { + val = val.movePointRight(2); + d = val.intValue(); + buff.put((byte) (2 * d + 1)); + val = val.subtract(BigDecimal.valueOf(d)); + } + buff.array()[buff.position() - 1] &= 0xfe; + ord.apply(buff.array(), startM, buff.position() - startM); + return e; + } + + /** + * Encode a numerical value using the variable-length encoding. + */ + public static void encodeNumeric(ByteBuffer buff, long val, Order ord) { + int e, i, start = buff.position(); + if (val == 0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (val <= -1) { + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeNumericLarge(buff, BigDecimal.valueOf(val).negate(), true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeNumericLarge(buff, BigDecimal.valueOf(val), false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Encode a numerical value using the variable-length encoding. + */ + public static void encodeNumeric(ByteBuffer buff, double val, Order ord) { + int start = buff.position(); + if (Double.isNaN(val)) { + buff.put((byte) 0x25); /* NaN */ + ord.apply(buff.array(), start, buff.position() - start); + } else if (val == Double.NEGATIVE_INFINITY) { + buff.put((byte) 0x07); + ord.apply(buff.array(), start, buff.position() - start); + } else if (val == Double.POSITIVE_INFINITY) { + buff.put((byte) 0x23); + ord.apply(buff.array(), start, buff.position() - start); + } else if (val == 0.0) { + buff.put((byte) 0x15); /* Numeric zero */ + ord.apply(buff.array(), start, buff.position() - start); + } else { + encodeNumeric(buff, BigDecimal.valueOf(val), ord); + } + } + + /** + * Encode a numerical value using the variable-length encoding. + */ + public static void encodeNumeric(ByteBuffer buff, BigDecimal val, Order ord) { + int e, i, start = buff.position(); + if (null == val) { + encodeNull(buff, ord); + return; + } else if (BigDecimal.ZERO.compareTo(val) == 0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (NEG_ONE.compareTo(val) >= 0) { // v <= -1.0 + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeNumericLarge(buff, val.negate(), true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else if (BigDecimal.ZERO.compareTo(val) > 0) { // v < 0.0 + buff.put((byte) 0x14); /* Small negative number: 0x14, -E, ~M */ + encodeNumericSmall(buff, val.negate(), false, true); + } else if (BigDecimal.ONE.compareTo(val) > 0) { // v < 1.0 + buff.put((byte) 0x16); /* Small positive number: 0x16, ~-E, M */ + encodeNumericSmall(buff, val, true, false); + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeNumericLarge(buff, val, false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Encode a numerical value using the variable-length encoding. + */ + public static void encodeNumeric(ByteBuffer buff, HNumeric val, Order ord) { + if (null == val) { + encodeNull(buff, ord); + } else if (val.isInteger()) { + encodeNumeric(buff, val.longValue(), ord); + } else if (val.isReal()) { + encodeNumeric(buff, val.doubleValue(), ord); + } else { + encodeNumeric(buff, val.exactValue(), ord); + } + } + + /** + * Decode a Numerical value from the variable-length encoding. The backing + * array is not modified through use of this method. + */ + public static HNumeric decodeNumeric(ByteBuffer buff) { + byte header = buff.get(); + if (header == 0x05 || header == (byte) 0xfa) + return null; + int e = 0; + boolean dsc = (-1 == Integer.signum(header)); + if (dsc) header = (byte) ((header ^ Order.mask) & 0xff); + + if (header == 0x25) { /* NaN */ + return HNumeric.NaN; + } else if (header == 0x07) { /* -inf */ + return HNumeric.NEGATIVE_INFINITY; + } else if (header == 0x08) { /* Large negative number: 0x08, ~E, ~M */ + e = (int) getVaruint64(buff, !dsc); + return new HNumeric(decodeSignificand(buff, e, !dsc).negate()); + } else if (header >= 0x09 && header <= 0x13) { /* Medium negative number: 0x13-E, ~M */ + e = 0x13 - header; + return new HNumeric(decodeSignificand(buff, e, !dsc).negate()); + } else if (header == 0x14) { /* Small negative number: 0x14, -E, ~M */ + e = (int) -getVaruint64(buff, dsc); + return new HNumeric(decodeSignificand(buff, e, !dsc).negate()); + } else if (header == 0x15) { /* zero */ + return HNumeric.ZERO; + } else if (header == 0x16) { /* Small positive number: 0x16, ~-E, M */ + e = (int) -getVaruint64(buff, !dsc); + return new HNumeric(decodeSignificand(buff, e, dsc)); + } else if (header >= 0x17 && header <= 0x21) { /* Medium positive number: 0x17+E, M */ + e = header - 0x17; + return new HNumeric(decodeSignificand(buff, e, dsc)); + } else if (header == 0x22) { /* Large positive number: 0x22, E, M */ + e = (int) getVaruint64(buff, dsc); + return new HNumeric(decodeSignificand(buff, e, dsc)); + } else if (header == 0x23) { /* +inf */ + return HNumeric.POSITIVE_INFINITY; + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(header)); + } + } + + /** + * Encode a String value. + */ + public static void encodeString(ByteBuffer buff, String val, Order ord) { + if (null == val) { + encodeNull(buff, ord); + return; + } + if (val.contains("\u0000")) + throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); + int start = buff.position(); + buff.put((byte) 0x33); + buff.put(val.getBytes(UTF8)); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a String value. The backing array is not modified through use of + * this method. + */ + public static String decodeString(ByteBuffer buff) { + byte header = buff.get(); + if (header == 0x05 || header == (byte) 0xfa) + return null; + assert header == 0x33 || header == (byte) 0xcc; + Order ord = header == 0x33 ? Order.ASCENDING : Order.DESCENDING; + byte[] a = buff.array(); + int start = buff.position(), i = start; + byte terminator = (byte) (ord == Order.ASCENDING ? 0x00 : 0xff); + while (a[i] != terminator) i++; + buff.position(++i); + if (Order.DESCENDING == ord) { + byte[] copy = Arrays.copyOfRange(a, start, i - 1); + ord.apply(copy); + return new String(copy, UTF8); + } else { + return new String(a, start, i - start - 1, UTF8); + } + } + + /** + * Calculate the expected blob-mid encoded length based on unencoded length. + */ + @VisibleForTesting + static int blobMidEncodedLength(int len) { + return (int) + Math.ceil( + (len * 8) // 8-bits per input byte + / 7.0) // 7-bits of input data per encoded byte, rounded up + + 1 // + 1-byte header + + 1; // + 1-byte terminator + } + + /** + * Calculate the expected blob-mid decoded length based on encoded length. + */ + @VisibleForTesting + static int blobMidDecodedLength(int len) { + return + ((len + - 1 // 1-byte terminator + - 1) // 1-byte header + * 7) // 7-bits of payload per encoded byte + / 8; // 8-bits per byte + } + + /** + * Encode a Blob value, intermediate element in Key. + */ + public static void encodeBlobMid(ByteBuffer buff, byte[] val, Order ord) { + if (null == val) { + encodeNull(buff, ord); + return; + } + // Blobs as intermediate entries are encoded as 7-bits per byte, null-terminated. + assert buff.remaining() >= blobMidEncodedLength(val.length) : "buffer overflow expected."; + int start = buff.position(); + buff.put((byte) 0x35); /* Blob-mid */ + byte s = 1, t = 0; + for (int i = 0; i < val.length; i++) { + buff.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s))); + if (s < 7) { + t = (byte) (val[i] << (7 - s)); + s++; + } else { + buff.put((byte) (0x80 | val[i])); + s = 1; + t = 0; + } + } + if (s > 1) buff.put((byte) (0x80 | t)); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a blob value that was encoded using BlobMid encoding. The backing + * array is not modified through use of this method. + */ + public static byte[] decodeBlobMid(ByteBuffer buff) { + byte header = buff.get(); + if (header == 0x05 || header == (byte) 0xfa) + return null; + assert header == 0x35 || header == (byte) 0xca; + boolean isDsc = header != 0x35; + byte[] a = buff.array(); + int start = buff.position(), end = start; + byte terminator = (byte) (isDsc ? 0xff : 0x00); + while (a[end] != terminator) end++; + if (end - start == 0) { + // skip empty input buffer. + buff.get(); + return new byte[0]; + } + // create ret buffer using length of encoded data + 1 (header byte) + 1 (term byte) + ByteBuffer ret = ByteBuffer.allocate(blobMidDecodedLength(end - start + 2)); + int s = 6; + byte t = (byte) (((isDsc ? a[start] ^ Order.mask : a[start]) << 1) & 0xff); + for (int i = start + 1; i < end; i++) { + if (s == 7) { + ret.put((byte) (t | ((isDsc ? a[i] ^ Order.mask : a[i]) & 0x7f))); + i++; + } else { + ret.put((byte) (t | (((isDsc ? a[i] ^ Order.mask : a[i]) & 0x7f) >>> s))); + } + t = (byte) (((isDsc ? a[i] ^ Order.mask : a[i]) << 8 - s) & 0xff); + s = s == 1 ? 7 : s - 1; + } + buff.position(++end); + assert t == 0 : "Unexpected bits remaining after decoding blob."; + return ret.array(); + } + + /** + * Encode a Blob value, last element in Key. + */ + public static void encodeBlobLast(ByteBuffer buff, byte[] val, int offset, int len, Order ord) { + if (null == val) { + encodeNull(buff, ord); + if (ord == Order.DESCENDING) { + // DESCENDING ordered BlobLast requires a termination bit to preserve + // sort-order semantics of null values. + buff.put(ord.apply((byte) 0x00)); + } + return; + } + // Blobs as final entry in a compound key are written unencoded. + int overhead = ord == Order.ASCENDING ? 1 : 2; + assert buff.remaining() >= len + overhead; + for (int i = offset; i < offset + len; i++) { + if (val[i] == 0x00) + throw new IllegalArgumentException("0x00 bytes not permitted in value."); + } + int start = buff.position(); + buff.put((byte) 0x36); + buff.put(val, offset, len); + // DESCENDING ordered BlobLast requires a termination bit to preserve + // sort-order semantics of null values. + if (ord == Order.DESCENDING) buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Encode a Blob value, last element in Key. + */ + public static void encodeBlobLast(ByteBuffer buff, byte[] val, Order ord) { + if (null == val) { + encodeNull(buff, ord); + if (ord == Order.DESCENDING) { + // DESCENDING ordered BlobLast requires a termination bit to preserve + // sort-order semantics of null values. + buff.put(ord.apply((byte) 0x00)); + } + return; + } + encodeBlobLast(buff, val, 0, val.length, ord); + } + + /** + * Decode a Blob value, last element in Key. The backing array is not + * modified through use of this method. + */ + public static byte[] decodeBlobLast(ByteBuffer buff) { + byte header = buff.get(); + if (header == 0x05) { + return null; + } else if (header == (byte) 0xfa) { + buff.get(); // read DESCENDING order termination bit. + return null; + } + + assert header == 0x36 || header == (byte) 0xc9; + Order ord = header == 0x36 ? Order.ASCENDING : Order.DESCENDING; + int length = buff.limit() - buff.position() - (ord == Order.ASCENDING ? 0 : 1); + byte[] ret = new byte[length]; + buff.get(ret); + if (ord == Order.DESCENDING) buff.get(); // throw away the termination marker. + ord.apply(ret, 0, ret.length); + return ret; + } + + /** + * Encode a null value. + */ + public static void encodeNull(ByteBuffer buff, Order ord) { + buff.put(ord.apply((byte) 0x05)); + } + + /** + * Encode an int32 value using the fixed-length encoding. + */ + public static void encodeInt32(ByteBuffer buff, int val, Order ord) { + int start = buff.position(); + buff.put((byte) 0x27); /* int32 */ + buff.put((byte) ((val >> 24) ^ 0x80)) + .put((byte) (val >> 16)) + .put((byte) (val >> 8)) + .put((byte) val); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode an int32 value. The backing array is not modified + * through use of this method. + */ + public static int decodeInt32(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x27 || header == (byte) 0xd8; + Order ord = header == 0x27 ? Order.ASCENDING : Order.DESCENDING; + int val = (ord.apply(buff.get()) ^ 0x80) & 0xff; + for (int i = 1; i < 4; i++) { + val = (val << 8) + (ord.apply(buff.get()) & 0xff); + } + return val; + } + + /** + * Encode an int64 value using the fixed-length encoding. + */ + public static void encodeInt64(ByteBuffer buff, long val, Order ord) { + int start = buff.position(); + buff.put((byte) 0x28); /* int64 */ + buff.put((byte) ((val >> 56) ^ 0x80)) + .put((byte) (val >> 48)) + .put((byte) (val >> 40)) + .put((byte) (val >> 32)) + .put((byte) (val >> 24)) + .put((byte) (val >> 16)) + .put((byte) (val >> 8)) + .put((byte) val); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode an int64 value. The backing array is not modified + * through use of this method. + */ + public static long decodeInt64(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x28 || header == (byte) 0xd7; + Order ord = header == 0x28 ? Order.ASCENDING : Order.DESCENDING; + long val = (ord.apply(buff.get()) ^ 0x80) & 0xff; + for (int i = 1; i < 8; i++) { + val = (val << 8) + (ord.apply(buff.get()) & 0xff); + } + return val; + } + + /** + * Encode a 32-bit floating point value using the fixed-length encoding. + * @see #decodeFloat32(ByteBuffer) + */ + public static void encodeFloat32(ByteBuffer buff, float val, Order ord) { + int start = buff.position(); + int i = Float.floatToIntBits(val); + i ^= ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE); + buff.put((byte) 0x29); /* float64 */ + buff.put((byte) (i >> 24)) + .put((byte) (i >> 16)) + .put((byte) (i >> 8)) + .put((byte) i); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a 32-bit floating point value using the fixed-length encoding. + * @see #encodeFloat32(ByteBuffer, float, Order) + */ + public static float decodeFloat32(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x29 || header == (byte) 0xd6; + Order ord = header == 0x29 ? Order.ASCENDING : Order.DESCENDING; + int val = ord.apply(buff.get()) & 0xff; + for (int i = 1; i < 4; i++) { + val = (val << 8) + (ord.apply(buff.get()) & 0xff); + } + val ^= (~val >> Integer.SIZE - 1) | Integer.MIN_VALUE; + return Float.intBitsToFloat(val); + } + + /** + * Encode a 64-bit floating point value using the fixed-length encoding. + *

+ *

+ * This format ensures the following total ordering of floating point + * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < + * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ... + * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN + *

+ * Floating point numbers are encoded as specified in IEEE 754. A 64-bit + * double precision float consists of a sign bit, 11-bit unsigned exponent + * encoded in offset-1023 notation, and a 52-bit significand. The format is + * described further in the Double Precision + * Floating Point Wikipedia page

+ *

+ * The value of a normal float is -1 sign bit × + * 2exponent - 1023 × 1.significand + *

+ *

+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *

+ *

+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is + * less than negative 1), and that all negative numbers compare less than + * any positive number. To accomplish this, we invert the sign bit of all + * floating point numbers, and we also invert the exponent and significand + * bits if the floating point number was negative. + *

+ *

+ * More specifically, we first store the floating point bits into a 64-bit + * long l using {@link Double#doubleToLongBits}. This method + * collapses all NaNs into a single, canonical NaN value but otherwise + * leaves the bits unchanged. We then compute + *

+ * + *
+   * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE
+   * 
+ *

+ * which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of l in most significant + * byte order is equivalent to performing a double precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs + * don't compare equal to anything when performing floating point + * comparisons). + *

+ *

+ * The resulting long integer is then converted into a byte array by + * serializing the long one byte at a time in most significant byte order. + * The serialized integer is prefixed by a single header byte. All + * serialized values are 9 bytes in length. + *

+ */ + public static void encodeFloat64(ByteBuffer buff, double val, Order ord) { + int start = buff.position(); + long lng = Double.doubleToLongBits(val); + lng ^= ((lng >> Long.SIZE - 1) | Long.MIN_VALUE); + buff.put((byte) 0x30); /* float64 */ + buff.put((byte) (lng >> 56)) + .put((byte) (lng >> 48)) + .put((byte) (lng >> 40)) + .put((byte) (lng >> 32)) + .put((byte) (lng >> 24)) + .put((byte) (lng >> 16)) + .put((byte) (lng >> 8)) + .put((byte) lng); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a 64-bit floating point value using the fixed-length encoding. + * @see #encodeFloat64(ByteBuffer, double, Order) + */ + public static double decodeFloat64(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x30 || header == (byte) 0xCF; + Order ord = header == 0x30 ? Order.ASCENDING : Order.DESCENDING; + long val = ord.apply(buff.get()) & 0xff; + for (int i = 1; i < 8; i++) { + val = (val << 8) + (ord.apply(buff.get()) & 0xff); + } + val ^= (~val >> Long.SIZE - 1) | Long.MIN_VALUE; + return Double.longBitsToDouble(val); + } + + /** + * Skip buff's cursor forward one encoded value. + */ + public static void skip(ByteBuffer buff) { + byte x = buff.get(); + boolean dsc = (-1 == Integer.signum(x)); + if (dsc) x = (byte) ((x ^ Order.mask) & 0xff); + + switch (x) { + case 0x05: /* null */ + case 0x07: /* -inf */ + case 0x15: /* zero */ + case 0x23: /* +inf */ + return; + case 0x08: /* Large negative number: 0x08, ~E, ~M */ + skipVaruint64(buff, !dsc); + skipSignificand(buff, !dsc); + return; + case 0x14: /* Small negative number: 0x14, -E, ~M */ + skipVaruint64(buff, dsc); + skipSignificand(buff, !dsc); + return; + case 0x16: /* Small positive number: 0x16, ~-E, M */ + skipVaruint64(buff, !dsc); + skipSignificand(buff, dsc); + return; + case 0x22: /* Large positive number: 0x22, E, M */ + skipVaruint64(buff, dsc); + skipSignificand(buff, dsc); + return; + case 0x09: /* Medium negative number: 0x13-E, ~M */ + case 0x10: + case 0x11: + case 0x12: + case 0x13: + skipSignificand(buff, !dsc); + return; + case 0x17: /* Medium positive number: 0x17+E, M */ + case 0x18: + case 0x19: + case 0x20: + case 0x21: + skipSignificand(buff, dsc); + return; + case 0x25: /* NaN */ + return; + case 0x27: /* 32-bit integer */ + case 0x30: /* 32-bit float */ + buff.position(buff.position() + 4); + return; + case 0x28: /* 64-bit integer */ + case 0x31: /* 64-bit float */ + buff.position(buff.position() + 8); + return; + case 0x33: /* Text */ + case 0x35: /* Blob-mid */ + case 0x36: /* Blob-last */ + // for null-terminated values, skip to the end. + do { + x = (byte) (dsc ? buff.get() ^ Order.mask : buff.get()); + } while (x != 0); + return; + default: + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(x)); + } + } + + /** + * Return the number of encoded entries remaining in buff. The + * state of buff is not modified through use of this method. + */ + public static int length(ByteBuffer buff) { + ByteBuffer b = buff.duplicate(); + int cnt = 0; + for (cnt = 0; b.position() != b.limit(); cnt++) { skip(b); } + return cnt; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java new file mode 100644 index 0000000..467a09d --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java @@ -0,0 +1,748 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestOrderedBytes { + + // integer constants for testing Numeric code paths + static final Long[] I_VALS = + { 0L, 1L, 10L, 99L, 100L, 1234L, 9999L, 10000L, 10001L, 12345L, 123450L, Long.MAX_VALUE }; + static final int[] I_LENGTHS = { 1, 2, 2, 2, 2, 3, 3, 2, 4, 4, 4, 11 }; + + // real constants for testing Numeric code paths + static final Double[] D_VALS = + { 0.0, 0.00123, 0.0123, 0.123, 1.0, 10.0, 12.345, 99.0, 99.01, 99.0001, 100.0, 100.01, + 100.1, 1234.0, 1234.5, 9999.0, 9999.000001, 9999.000009, 9999.00001, 9999.00009, + 9999.000099, 9999.0001, 9999.001, 9999.01, 9999.1, 10000.0, 10001.0, 12345.0, 123450.0, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN, Double.MAX_VALUE }; + static final int[] D_LENGTHS = + { 1, 4, 4, 4, 2, 2, 4, 2, 3, 4, 2, 4, + 4, 3, 4, 3, 6, 6, 6, 6, + 6, 5, 5, 4, 4, 2, 4, 4, 4, + 1, 1, 1, 11 }; + + // fill in other gaps in Numeric code paths + static final HNumeric[] N_VALS = + { null, new HNumeric(Long.MAX_VALUE), new HNumeric(Long.MIN_VALUE), + new HNumeric(Double.MAX_VALUE), new HNumeric(Double.MIN_VALUE), + new HNumeric(BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100))) }; + static final int[] N_LENGTHS = + { 1, 11, 11, 11, 4, 12 }; + + /* + * This is the smallest difference between two doubles in D_VALS + */ + static final double MIN_EPSILON = 0.000001; + + /** + * Expected lengths of equivalent values should match + */ + @Test + public void testVerifyTestIntegrity() { + for (int i = 0; i < I_VALS.length; i++) { + for (int d = 0; d < D_VALS.length; d++) { + if (Math.abs(I_VALS[i] - D_VALS[d]) < MIN_EPSILON) { + assertEquals( + "Test inconsistency detected: expected lengths for " + I_VALS[i] + " do not match.", + I_LENGTHS[i], D_LENGTHS[d]); + } + } + } + } + + /** + * Tests the variable uint64 encoding. + *

+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775 + * 281474976710655 72057594037927935 18446744073709551615
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 9223372036854775808 = ff8000000000000000 (Long.MIN_VAL)
+ * 18446744073709551615 = ffffffffffffffffff
+ *

+ */ + @Test + public void testVaru64Boundaries() { + int len; + + long vals[] = + { 239L, 240L, 2286L, 2287L, 67822L, 67823L, 16777214L, 16777215L, 4294967294L, 4294967295L, + 1099511627774L, 1099511627775L, 281474976710654L, 281474976710655L, 72057594037927934L, + 72057594037927935L, Long.MAX_VALUE - 1, Long.MAX_VALUE, Long.MIN_VALUE + 1, + Long.MIN_VALUE, -2L, -1L }; + int lens[] = { 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9 }; + assertEquals("Broken test!", vals.length, lens.length); + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (boolean comp : new boolean[] { true, false }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(lens[i] + 1); + buf.get(); // skip first byte + len = OrderedBytes.putVaruint64(buf, vals[i], comp); + assertEquals("Surprising serialized length.", lens[i], len); + assertEquals(buf.limit(), buf.position()); + buf.flip(); + buf.get(); // skip first byte + assertEquals("Length inspection failed.", + lens[i], OrderedBytes.lengthVaru64(buf, comp)); + assertEquals("Deserialization failed.", vals[i], OrderedBytes.getVaruint64(buf, comp)); + assertEquals(buf.limit(), buf.position()); + } + } + } + + /** + * Test integer encoding. Example input values come from reference wiki + * page. + */ + protected void testNumericInt() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < I_VALS.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(I_LENGTHS[i] + 1); + buf1.get(); + OrderedBytes.encodeNumeric(buf1, I_VALS[i], ord); + assertEquals( + "Encoded value does not match expected length.", + buf1.capacity(), buf1.position()); + buf1.flip(); + buf1.get(); + long decoded = OrderedBytes.decodeNumeric(buf1).longValue(); + assertEquals( + "Decoded value does not match expected value.", + I_VALS[i].longValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[I_VALS.length][]; + for (int i = 0; i < I_VALS.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(I_LENGTHS[i] + 1); + buf.get(); + OrderedBytes.encodeNumeric(buf, I_VALS[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Long[] sortedVals = Arrays.copyOf(I_VALS, I_VALS.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + long decoded = OrderedBytes.decodeNumeric(buf).longValue(); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].longValue(), decoded); + } + } + } + + /** + * Test real encoding. Example input values come from reference wiki page. + */ + protected void testNumericReal() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < D_VALS.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(D_LENGTHS[i] + 1); + buf1.get(); + OrderedBytes.encodeNumeric(buf1, D_VALS[i], ord); + assertEquals(buf1.capacity(), buf1.position()); + buf1.flip(); + buf1.get(); + double decoded = OrderedBytes.decodeNumeric(buf1).doubleValue(); + assertEquals( + "Decoded value does not match expected value.", + D_VALS[i].doubleValue(), + decoded, MIN_EPSILON); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[D_VALS.length][]; + for (int i = 0; i < D_VALS.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(D_LENGTHS[i] + 1); + buf.get(); + OrderedBytes.encodeNumeric(buf, D_VALS[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Double[] sortedVals = Arrays.copyOf(D_VALS, D_VALS.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + double decoded = OrderedBytes.decodeNumeric(buf).doubleValue(); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].doubleValue(), decoded, MIN_EPSILON); + } + } + } + + /** + * Fill gaps in Numeric encoding testing. + */ + protected void testNumericOther() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < N_VALS.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(N_LENGTHS[i] + 1); + buf1.get(); + OrderedBytes.encodeNumeric(buf1, N_VALS[i], ord); + assertEquals(buf1.capacity(), buf1.position()); + buf1.flip(); + buf1.get(); + HNumeric decoded = OrderedBytes.decodeNumeric(buf1); + assertEquals("Decoded value does not match expected value.", N_VALS[i], decoded); + } + } + } + + /** + * Verify Real and Int encodings are compatible. + */ + protected void testNumericIntRealCompatibility() { + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < I_VALS.length; i++) { + // skip values for which BigDecimal instantiation drops precision + BigDecimal bdi = BigDecimal.valueOf(I_VALS[i]); + if (bdi.compareTo(BigDecimal.valueOf((double) I_VALS[i])) != 0) continue; + + // verify primitives + ByteBuffer bi = ByteBuffer.allocate(I_LENGTHS[i]); + ByteBuffer br = ByteBuffer.allocate(I_LENGTHS[i]); + OrderedBytes.encodeNumeric(bi, I_VALS[i], ord); + OrderedBytes.encodeNumeric(br, I_VALS[i], ord); + assertEquals(bi, br); + bi.flip(); + assertEquals((long) I_VALS[i], OrderedBytes.decodeNumeric(bi).longValue()); + br.flip(); + assertEquals((long) I_VALS[i], (long) OrderedBytes.decodeNumeric(br).doubleValue()); + + // verify BigDecimal for Real encoding + br = ByteBuffer.allocate(I_LENGTHS[i]); + OrderedBytes.encodeNumeric(br, bdi, ord); + assertEquals(bi, br); + bi.flip(); + assertEquals(0, + bdi.compareTo(BigDecimal.valueOf(OrderedBytes.decodeNumeric(bi).longValue()))); + } + } + } + + /** + * Test Numeric encoding. + */ + @Test + public void testNumeric() { + testNumericInt(); + testNumericReal(); + testNumericOther(); + testNumericIntRealCompatibility(); + } + + /** + * Test int32 encoding. + */ + @Test + public void testInt32() { + Integer[] vals = + { Integer.MIN_VALUE, Integer.MIN_VALUE / 2, 0, Integer.MAX_VALUE / 2, Integer.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(5 + 1); + buf1.get(); + OrderedBytes.encodeInt32(buf1, vals[i], ord); + assertEquals( + "Encoded value does not match expected length.", + buf1.capacity(), buf1.position()); + buf1.flip(); + buf1.get(); + int decoded = OrderedBytes.decodeInt32(buf1); + assertEquals("Decoded value does not match expected value.", + vals[i].intValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(5 + 1); + buf.get(); + OrderedBytes.encodeInt32(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Integer[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + int decoded = OrderedBytes.decodeInt32(buf); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].intValue(), decoded); + } + } + } + + /** + * Test int64 encoding. + */ + @Test + public void testInt64() { + Long[] vals = { Long.MIN_VALUE, Long.MIN_VALUE / 2, 0L, Long.MAX_VALUE / 2, Long.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(9 + 1); + buf1.get(); + OrderedBytes.encodeInt64(buf1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.capacity(), + buf1.position()); + buf1.flip(); + buf1.get(); + long decoded = OrderedBytes.decodeInt64(buf1); + assertEquals("Decoded value does not match expected value.", vals[i].longValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(9 + 1); + buf.get(); + OrderedBytes.encodeInt64(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Long[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + long decoded = OrderedBytes.decodeInt64(buf); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].longValue(), decoded); + } + } + } + + /** + * Test float32 encoding. + */ + @Test + public void testFloat32() { + Float[] vals = + { Float.MIN_VALUE, Float.MIN_VALUE + 1.0f, 0.0f, Float.MAX_VALUE / 2.0f, Float.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(5 + 1); + buf1.get(); + OrderedBytes.encodeFloat32(buf1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.capacity(), + buf1.position()); + buf1.flip(); + buf1.get(); + float decoded = OrderedBytes.decodeFloat32(buf1); + assertEquals("Decoded value does not match expected value.", + Float.floatToIntBits(vals[i].floatValue()), + Float.floatToIntBits(decoded)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(5 + 1); + buf.get(); + OrderedBytes.encodeFloat32(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Float[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + float decoded = OrderedBytes.decodeFloat32(buf); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + Float.floatToIntBits(sortedVals[i].floatValue()), + Float.floatToIntBits(decoded)); + } + } + } + + /** + * Test float64 encoding. + */ + @Test + public void testFloat64() { + Double[] vals = + { Double.MIN_VALUE, Double.MIN_VALUE + 1.0, 0.0, Double.MAX_VALUE / 2.0, Double.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(9 + 1); + buf1.get(); + OrderedBytes.encodeFloat64(buf1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.capacity(), + buf1.position()); + buf1.flip(); + buf1.get(); + double decoded = OrderedBytes.decodeFloat64(buf1); + assertEquals("Decoded value does not match expected value.", + Double.doubleToLongBits(vals[i].doubleValue()), + Double.doubleToLongBits(decoded)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(9 + 1); + buf.get(); + OrderedBytes.encodeFloat64(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Double[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + double decoded = OrderedBytes.decodeFloat64(buf); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + Double.doubleToLongBits(sortedVals[i].doubleValue()), + Double.doubleToLongBits(decoded)); + } + } + } + + /** + * Test string encoding. + */ + @Test + public void testString() { + String[] vals = { "foo", "bar", "baz" }; + int expectedLengths[] = { 5, 5, 5 }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(expectedLengths[i] + 1); + buf1.get(); + OrderedBytes.encodeString(buf1, vals[i], ord); + buf1.flip(); + buf1.get(); + assertEquals( + "Decoded value does not match expected value.", + vals[i], OrderedBytes.decodeString(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(expectedLengths[i] + 1); + buf.get(); + OrderedBytes.encodeString(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + String[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + String decoded = OrderedBytes.decodeString(buf); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testStringNoNullChars() { + ByteBuffer buff = ByteBuffer.allocate(3); + OrderedBytes.encodeString(buff, "\u0000", Order.ASCENDING); + } + + /** + * Test length estimation algorithms for Blob-mid encoding. + */ + @Test + public void testblobMidLencodedLength() { + int[][] values = { + /* decoded length, encoded length + * ceil((n bytes * 8 bits/input byte) / 7 bits/encoded byte) + 1 header + 1 termination + */ + { 0, 2 }, { 1, 4 }, { 2, 5 }, { 3, 6 }, { 4, 7 }, + { 5, 8 }, { 6, 9 }, { 7, 10 }, { 8, 12 } + }; + + for (int[] pair : values) { + assertEquals(pair[1], OrderedBytes.blobMidEncodedLength(pair[0])); + assertEquals(pair[0], OrderedBytes.blobMidDecodedLength(pair[1])); + } + } + + /** + * Test Blob-mid encoding. + */ + @Test + public void testBlobMid() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa }, + "1".getBytes(), "22".getBytes(), "333".getBytes(), "4444".getBytes(), + "55555".getBytes(), "666666".getBytes(), "7777777".getBytes(), "88888888".getBytes() }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + ByteBuffer buf1 = ByteBuffer.allocate(OrderedBytes.blobMidEncodedLength(val.length) + 1); + buf1.get(); + OrderedBytes.encodeBlobMid(buf1, val, ord); + buf1.flip(); + buf1.get(); + assertArrayEquals( + "Decoded value does not match expected value.", + val, OrderedBytes.decodeBlobMid(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(OrderedBytes.blobMidEncodedLength(vals[i].length) + 1); + buf.get(); + OrderedBytes.encodeBlobMid(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + byte[] decoded = OrderedBytes.decodeBlobMid(buf); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + /** + * Test Blob-last encoding. + */ + @Test + public void testBlobLast() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa } }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + ByteBuffer buf1 = ByteBuffer.allocate(val.length + (ord == Order.ASCENDING ? 2 : 3)); + buf1.get(); + OrderedBytes.encodeBlobLast(buf1, val, ord); + buf1.flip(); + buf1.get(); + assertArrayEquals( + "Decoded value does not match expected value.", + val, OrderedBytes.decodeBlobLast(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(vals[i].length + (ord == Order.ASCENDING ? 2 : 3)); + buf.get(); + OrderedBytes.encodeBlobLast(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + byte[] decoded = OrderedBytes.decodeBlobLast(buf); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + + /* + * assert byte[] segments are serialized correctly. + */ + ByteBuffer buf = ByteBuffer.allocate(3 + 2); + OrderedBytes.encodeBlobLast(buf, "foobarbaz".getBytes(), 3, 3, Order.ASCENDING); + buf.flip(); + assertArrayEquals("bar".getBytes(), OrderedBytes.decodeBlobLast(buf)); + } + + /** + * Assert invalid input byte[] are rejected by Blob-last + */ + @Test(expected = IllegalArgumentException.class) + public void testBlobLastNoZeroBytes() { + byte[] val = { 0x01, 0x02, 0x00, 0x03 }; + ByteBuffer buf = ByteBuffer.allocate(val.length + 2); + OrderedBytes.encodeBlobLast(buf, val, Order.ASCENDING); + fail("test should never get here."); + } +} -- 1.8.3.2