From e6c75ab4ed33056c91609af4de534b55e47ad46e Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 6 Jun 2013 14:05:21 -0700 Subject: [PATCH] HBASE-8201 OrderedBytes provides order-preserving serialization OrderedBytes provides a serialization format in which the resulting byte[] retains the same sort order as the natural types. Serialized formats can be inspected and decoded without forward knowledge of their content. Implementations are provided for Numeric values with up to 64 bits of precision, Strings, and byte[]. Utility methods for counting and skipping encoded entries are also provided. The encoding format is modeled after the SQLite4 key encoding format. See http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki for details. A notable deviation from the sqlite spec is in the blob-last format. Blob-last is modified to include a termination byte of 0x00. This is necessary in order to maintain reverse sort order of empty values. A notable difference between this specification and Java is the sort order of NaN. This spec treats NaN as less than all values, including null. Java sorts NaN as greater than any floating point value. See http://docs.oracle.com/javase/1.5.0/docs/api/java/util/Arrays.html#sort(double[]) remaining TODOs include: - an implementation for encoding and decoding Real values that doesn't suffer from rounding error. - consider using BigInteger/BigDecimal to allow for arbitrary size and precision numerics. --- .../org/apache/hadoop/hbase/util/OrderedBytes.java | 1069 ++++++++++++++++++++ .../apache/hadoop/hbase/util/TestOrderedBytes.java | 444 ++++++++ 2 files changed, 1513 insertions(+) create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java create mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java new file mode 100644 index 0000000..cc29561 --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java @@ -0,0 +1,1069 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Utility class that handles ordered byte arrays. That is, unlike + * {@link Bytes}, these methods produce byte arrays which maintain the sort + * order of the original values. + *

+ * Encoding specification is nicked from SQLite4's encoding scheme, hence the + * external links. + *

+ *

Summary

+ *

+ * Each value is encoded as one or more bytes. The first byte of the encoding, + * its meaning, and a terse description of the bytes that follow is given by + * the following table: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Content TypeEncoding
NULL0x05
NaN0x06
negative infinity0x07
negative large0x08, ~E, ~M
negative medium0x13-E, ~M
negative small0x14, -E, ~M
zero0x15
positive small0x16, ~-E, M
positive medium0x17+E, M
positive large0x22, E, M
positive infinity0x23
text0x24, T
binary0x25, B
final binary0x26, X
+ *

+ *

Null Encoding

+ *

+ * Each value that is a NULL encodes as a single byte of 0x05. Since every + * other value encoding begins with a byte greater than 0x05, this forces NULL + * values to sort first. + *

+ *

Text Encoding

+ *

+ * Each text value begins with a single byte of 0x24 and ends with a single + * byte of 0x00. There are zero or more intervening bytes that encode the text + * value. The intervening bytes are chosen so that the encoding will sort in + * the desired collating order. The intervening bytes may not contain a 0x00 + * character; the only 0x00 byte allowed in a text encoding is the final byte. + *

+ *

+ * The text encoding ends in 0x00 in order to ensure that when there are two + * strings where one is a prefix of the other that the shorter string will + * sort first. + *

+ *

Binary Encoding

+ *

+ * The encoding of binaries fields is different depending on whether or not + * the value to be encoded is the last value (the right-most value) in the + * key. + *

+ *

+ * Each value that is BINARY that is not the last value of the key begins with + * a single byte of 0x25 and ends with a single byte of 0x00. There are zero + * or more intervening bytes that encode the binary value. None of the + * intervening bytes may be zero. Each of the intervening bytes contains 7 + * bits of blob content with a 1 in the high-order bit (the 0x80 bit). The + * final byte before the 0x00 contains any left-over bits of the blob content. + *

+ *

+ * When the very last value of a key is BINARY, then it is encoded as a single + * byte of 0x26 and is followed by a byte-for-byte copy of the BINARY value. + * This alternative encoding is more efficient, but it only works if there are + * no subsequent values in the key, since there is no termination mark on the + * BLOB being encoded. + *

+ *

Numeric Encoding

+ *

+ * Numeric values must be coded so as to sort in numeric order. We assume that + * numeric values can be both integer and floating point values. + *

+ *

+ * Simplest cases first: If the numeric value is a NaN, then the encoding is a + * single byte of 0x06. This causes NaN values to sort prior to every other + * numeric value. The only value that is less than a NaN is a NULL. + *

+ *

+ * If the numeric value is a negative infinity then the encoding is a single + * byte of 0x07. Since every other numeric value except NaN has a larger + * initial byte, this encoding ensures that negative infinity will sort prior + * to every other numeric value other than NaN. + *

+ *

+ * If the numeric value is a positive infinity then the encoding is a single + * byte of 0x23. Every other numeric value encoding begins with a smaller + * byte, ensuring that positive infinity always sorts last among numeric + * values. 0x0d is also smaller than 0x0e, the initial byte of a text value, + * ensuring that every numeric value sorts before every text value. + *

+ *

+ * If the numeric value is exactly zero then it is encoded as a single byte of + * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 + * and finite positive values will have initial bytes of 0x16 through 0x22. + *

+ *

+ * For all values, we compute a mantissa M and an exponent E. The mantissa is + * a base-100 representation of the value. The exponent E determines where to + * put the decimal point. + *

+ *

+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X>=0 and X<=99) then the byte value will be + * 2*X+1 for every byte of the mantissa, except for the last byte which will + * be 2*X+0. The mantissa must be the minimum number of bytes necessary to + * represent the value; trailing X==0 digits are omitted. This means that the + * mantissa will never contain a byte with the value 0x00. + *

+ *

+ * If we assume all digits of the mantissa occur to the right of the decimal + * point, then the exponent E is the power of one hundred by which one must + * multiply the mantissa to recover the original value. + *

+ *

+ * Values are classified as large, medium, or small according to the value of + * E. If E is 11 or more, the value is large. For E between 0 and 10, the + * value is medium. For E less than zero, the value is small. + *

+ *

+ * Large positive values are encoded as a single byte 0x22 followed by E as a + * varint and then M. Medium positive values are a single byte of 0x17+E + * followed by M. Small positive values are encoded as a single byte 0x16 + * followed by the ones-complement of the varint for -E followed by M. + *

+ *

+ * Small negative values are encoded as a single byte 0x14 followed by -E as a + * varint and then the ones-complement of M. Medium negative values are + * encoded as a byte 0x13-E followed by the ones-complement of M. Large + * negative values consist of the single byte 0x08 followed by the + * ones-complement of the varint encoding of E followed by the ones-complement + * of M. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OrderedBytes { + + /** + * The order in which an OrderedBytes implementation will sort, according to + * the natural order of the underlying type. + */ + public enum Order { + ASCENDING ((byte)0x00), + DESCENDING ((byte)0xff); + + public final byte mask; + + /** + * Returns the adjusted trichotomous value according to the ordering + * imposed by this Order. + */ + public int cmp(int cmp) { + return cmp * (this == ASCENDING ? 1 : -1); + } + + /** + * Apply order to the byte b. + */ + public byte apply(byte b) { + return (byte) (this == ASCENDING ? b : b ^ this.mask); + } + + /** + * Apply order to the byte array a. + */ + public void apply(byte[] a) { + if (this != DESCENDING) return; + for (int i = 0; i < a.length; i++) { + a[i] ^= this.mask; + } + } + + /** + * Apply order to the byte array a according to the Order. + */ + public void apply(byte[] a, int offset, int length) { + if (this != DESCENDING) return; + for (int i = 0; i < length; i++) { + a[offset + i] ^= this.mask; + } + } + + @Override + public String toString() { return this == ASCENDING ? "asc" : "dsc"; } + + Order(byte mask) { this.mask = mask; } + } + + /** + * A Double Comparator that treats NaN as smallest. + */ + public static final Comparator REAL_CMP = new Comparator() { + @Override + public int compare(Double o1, Double o2) { + if (o1.isNaN()) return -1; + if (o2.isNaN()) return 1; + return o1.compareTo(o2); + } + }; + + /** + * Perform unsigned comparison between two long values. Conforms to the same + * interface as {@link Comparator#compare(Object, Object)}. + */ + private static int unsignedCmp(long x1, long x2) { + int cmp; + if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; + // invert the result when either value is negative + if ((x1 < 0) != (x2 < 0)) return -cmp; + return cmp; + } + + /** + * Write a 32-bit unsigned integer to dst as 4 big-endian + * bytes. + * @return number of bytes written. + */ + private static int putUint32(ByteBuffer dst, int val) { + dst.put((byte) (val >>> 24)) + .put((byte) (val >>> 16)) + .put((byte) (val >>> 8)) + .put((byte) val); + return 4; + } + + /** + * Encode an unsigned 64-bit integer val into dst. + * Compliment the encoded value when comp is true. + *

+ * This method is package-private for testing. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/varint.c, int + * sqlite4PutVarint64(unsigned char *z, sqlite4_uint64 x) + */ + static int putVaruint64(ByteBuffer dst, long val, boolean comp) { + int w, y, start = dst.position(); + byte[] a = dst.array(); + Order ord = comp ? Order.DESCENDING : Order.ASCENDING; + if (-1 == unsignedCmp(val, 241L)) { + dst.put((byte) val); + ord.apply(a, start, 1); + return 1; + } + if (-1 == unsignedCmp(val, 2288L)) { + y = (int) (val - 240); + dst.put((byte) (y / 256 + 241)) + .put((byte) (y % 256)); + ord.apply(a, start, 2); + return 2; + } + if (-1 == unsignedCmp(val, 67824L)) { + y = (int) (val - 2288); + dst.put((byte) 249) + .put((byte) (y / 256)) + .put((byte) (y % 256)); + ord.apply(a, start, 3); + return 3; + } + y = (int) (val & 0xffffffff); + w = (int) (val >>> 32); + if (w == 0) { + if (-1 == unsignedCmp(y, 16777216L)) { + dst.put((byte) 250) + .put((byte) (y >>> 16)) + .put((byte) (y >>> 8)) + .put((byte) y); + ord.apply(a, start, 4); + return 4; + } + dst.put((byte) 251); + putUint32(dst, y); + ord.apply(a, start, 5); + return 5; + } + if (-1 == unsignedCmp(w, 256L)) { + dst.put((byte) 252) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 6); + return 6; + } + if (-1 == unsignedCmp(w, 65536L)) { + dst.put((byte) 253) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 7); + return 7; + } + if (-1 == unsignedCmp(w, 16777216L)) { + dst.put((byte) 254) + .put((byte) (w >>> 16)) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + ord.apply(a, start, 8); + return 8; + } + dst.put((byte) 255); + putUint32(dst, w); + putUint32(dst, y); + ord.apply(a, start, 9); + return 9; + } + + /** + * Inspect an encoded varu64 for it's encoded length. Does not modify + * src's state. + *

+ * This method is package-private for testing. + *

+ * @param src source buffer + * @param comp if true, parse the compliment of the value. + * @return number of bytes consumed by this value + * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + */ + static int lengthVaru64(ByteBuffer src, boolean comp) { + byte[] a = src.array(); + int i = src.position(); + int a0 = (comp ? a[i] ^ 0xff : a[i]) & 0xff; + if (a0 <= 240) return 1; + if (a0 >= 241 && a0 <= 248) return 2; + if (a0 == 249) return 3; + if (a0 == 250) return 4; + if (a0 == 251) return 5; + if (a0 == 252) return 6; + if (a0 == 253) return 7; + if (a0 == 254) return 8; + if (a0 == 255) return 9; + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(a[i])); + } + + /** + * Decode a sequence of bytes in src as an unsigned 64-bit + * integer. Compliment the encoded value when comp is true. + *

+ * This method is package-private for testing. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/varint.c, int + * sqlite4GetVarint64(const unsigned char *z, int n, sqlite4_uint64 + * *pResult) + */ + static long getVaruint64(ByteBuffer src, boolean comp) { + assert src.remaining() >= lengthVaru64(src, comp); + long ret; + byte x = src.get(); + int a0 = (comp ? x ^ 0xff : x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; + if (-1 == unsignedCmp(a0, 241)) { + return a0; + } + x = src.get(); + a1 = (comp ? x ^ 0xff : x) & 0xff; + if (-1 == unsignedCmp(a0, 249)) { + return (a0 - 241) * 256 + a1 + 240; + } + x = src.get(); + a2 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 249) { + return 2288 + 256 * a1 + a2; + } + x = src.get(); + a3 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 250) { + return (a1 << 16) | (a2 << 8) | a3; + } + x = src.get(); + a4 = (comp ? x ^ 0xff : x) & 0xff; + // seed ret with unshifted a1 because sign-extension bites us when casting (long) (a1 << 24). + ret = a1; + ret = (ret << 24) | ((a2 & 0xff) << 16) | ((a3 & 0xff) << 8) | (a4 & 0xff); + if (a0 == 251) { + return ret; + } + x = src.get(); + a5 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 252) { + return (ret << 8) | a5; + } + x = src.get(); + a6 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 253) { + return (ret << 16) | (a5 << 8) | a6; + } + x = src.get(); + a7 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 254) { + return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; + } + x = src.get(); + a8 = (comp ? x ^ 0xff : x) & 0xff; + return (ret << 32) | (0xffffffff & ((a5 << 24) | (a6 << 16) | (a7 << 8) | a8)); + } + + /** + * Helper for encoding the positive integer m using the key + * encoding. Appends the significand M to buff and + * returns the associated Exponent. Write the compliment of e + * to buff when ecomp is true. Write the + * compliment of M to buff when mcomp + * is true. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int + * encodeIntKey(sqlite4_uint64 m, KeyEncoder *p) + */ + private static int encodeIntKey(ByteBuffer buff, long m, boolean ecomp, boolean mcomp) { + assert m > 0; + int i = 0, e, startM; + Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING; + byte[] digits = new byte[20]; + do { + digits[i++] = (byte) ((m % 100) & 0xff); + m /= 100; + } while (m > 0); + e = i; + assert e >= 1 && e <= 10; + if (e > 10) putVaruint64(buff, e, ecomp); + startM = buff.position(); + while (i > 0) + buff.put((byte) ((digits[--i] * 2 + 1) & 0xff)); + buff.array()[buff.position() - 1] &= 0xfe; + ord.apply(buff.array(), startM, buff.position() - startM); + return e; + } + + /** + * Encode an integer value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeInt(ByteBuffer buff, long v, Order ord) { + int e, i, start = buff.position(); + if (v == 0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (v < 0) { + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeIntKey(buff, -v, true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeIntKey(buff, v, false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Read significand digits from buff according to the magnitude + * of e. Uses a long for the accumulator. + *

+ * Note: + *

    + *
  • separate methods for Int and Real are necessary because double loses + * precision around -Long.MAX_INT.
  • + *
  • This method does not support deserializing the value + * Long.MIN_VALUE because of accumulator overflow.
  • + *
+ *

+ */ + private static long decodeSignificandInt(ByteBuffer buff, int e, boolean comp) { + byte[] a = buff.array(); + long m = 0; + long p = e - 1; + for (int i = buff.position();; i++) { + m += Math.pow(100.0, p) * (((comp ? a[i] ^ 0xff : a[i]) & 0xff) / 2); + p--; + if (((comp ? a[i] ^ 0xff : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + return m; + } + + /** + * Read significand digits from buff according to the magnitude + * of e. Uses a double for the accumulator. Treat + * encoded bytes as compliments when comp is true. + *

+ * Note: + *

    + *
  • separate methods for Int and Real are necessary because double loses + * precision around -Long.MAX_VALUE.
  • + *
  • after roughly 10 significand bytes, the accumulator flips over to + * infinity, thus extremely large values such as + * Double.MAX_VALUE cannot be decoded.
  • + *
+ *

+ */ + private static double decodeSignificandReal(ByteBuffer buff, int e, boolean comp) { + byte[] a = buff.array(); + double m = 0; + double p = e - 1; + for (int i = buff.position();; i++) { + m += Math.pow(100.0, p) * (((comp ? a[i] ^ 0xff : a[i]) & 0xff) / 2); + p--; + if (((comp ? a[i] ^ 0xff : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + return m; + } + + /** + * Decode an integer value. The backing array is not modified through use of + * this method. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int sqlite4VdbeDecodeIntKey(...) + */ + public static long decodeInt(ByteBuffer buff) { + byte x = buff.get(); + int e = 0; + boolean dsc = (-1 == Integer.signum(x)); + if (dsc) x = (byte) ((x ^ 0xff) & 0xff); + + if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */ + e = 0x13 - x; + return (long) -decodeSignificandInt(buff, e, true ^ dsc); + } else if (x == 0x15) { /* Numeric zero */ + return 0; + } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */ + e = x - 0x17; + return (long) decodeSignificandInt(buff, e, false ^ dsc); + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x)); + } + } + + /** + * Compare two doubles for equality, within a margin of error. + */ + private static boolean doubleEquals(double r, double l) { + return r == l ? true : Math.abs(r - l) < 0.000001; + } + + /** + * Encode the small positive floating point number r using the key encoding. + * The caller guarantees that r will be less than 1.0 and greater than 0.0. + * Write the compliment of e to buff when + * ecomp is true. Write the compliment of M to + * buff when mcomp is true. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static void + * encodeSmallFloatKey(double r, KeyEncoder *p) + */ + private static void encodeSmallFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) { + assert r > 0.0 && r < 1.0; + int e = 0, d, startM; + Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING; + while (r < 1e-10) { r *= 1e8; e += 4; } + while (r < 0.01) { r *= 100.0; e++; } + putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) { + r *= 100.0; + d = (int) r; + buff.put((byte) ((2 * d + 1) & 0xff)); + r -= d; + } + buff.array()[buff.position() - 1] &= 0xfe; + ord.apply(buff.array(), startM, buff.position() - startM); + } + + /** + * Encode the large positive floating point number r using the key encoding. + * The caller guarantees that r will be finite and greater than or equal to + * 1.0.Write the compliment of e to buff when + * ecomp is true. Write the compliment of M to + * buff when mcomp is true. + * @return E(xponent) + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int + * encodeLargeFloatKey(double r, KeyEncoder *p) + */ + private static int encodeLargeFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) { + assert r >= 1.0; + int e = 0, d, startM; + Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING; + while (r >= 1e32 && e <= 350) { r *= 1e-32; e +=16; } + while (r >= 1e8 && e <= 350) { r *= 1e-8; e+= 4; } + while (r >= 1.0 && e <= 350) { r *= 0.01; e++; } + if (e > 10) putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) { + r *= 100.0; + d = (int) r; + buff.put((byte) ((2 * d + 1) & 0xff)); + r = (r * 100.0 - d * 100.0) * 0.01; // r -= d avoid rounding error. + } + buff.array()[buff.position() - 1] &= 0xfe; + ord.apply(buff.array(), startM, buff.position() - startM); + return e; + } + + /** + * Encode a Real value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeReal(ByteBuffer buff, double r, Order ord) { + int e, i, start = buff.position(); + if (r == 0.0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (Double.isNaN(r)) { + buff.put((byte) 0x06); /* NaN */ + } else if (Double.NEGATIVE_INFINITY == r) { + buff.put((byte) 0x07); + } else if (Double.POSITIVE_INFINITY == r) { + buff.put((byte) 0x23); + } else if (r <= -1.0) { + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeLargeFloatKey(buff, -r, true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else if (r < 0.0) { + buff.put((byte) 0x14); /* Small negative number: 0x14, -E, ~M */ + encodeSmallFloatKey(buff, -r, false, true); + } else if (r < 1.0) { + buff.put((byte) 0x16); /* Small positive number: 0x16, ~-E, M */ + encodeSmallFloatKey(buff, r, true, false); + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeLargeFloatKey(buff, r, false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a Real value. The backing array is not modified through use of + * this method. + */ + public static double decodeReal(ByteBuffer buff) { + byte x = buff.get(); + int e = 0; + boolean dsc = (-1 == Integer.signum(x)); + if (dsc) x = (byte) ((x ^ 0xff) & 0xff); + + if (x == 0x06) { /* NaN */ + return Double.NaN; + } else if (x == 0x07) { /* -inf */ + return Double.NEGATIVE_INFINITY; + } else if (x == 0x08) { /* Large negative number: 0x08, ~E, ~M */ + e = (int) getVaruint64(buff, true ^ dsc); + return -decodeSignificandReal(buff, e, true ^ dsc); + } else if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */ + e = 0x13 - x; + return -decodeSignificandReal(buff, e, true ^ dsc); + } else if (x == 0x14) { /* Small negative number: 0x14, -E, ~M */ + e = (int) -getVaruint64(buff, false ^ dsc); + return -decodeSignificandReal(buff, e, true ^ dsc); + } else if (x == 0x15) { /* zero */ + return Double.valueOf(0); + } else if (x == 0x16) { /* Small positive number: 0x16, ~-E, M */ + e = (int) -getVaruint64(buff, true ^ dsc); + return decodeSignificandReal(buff, e, false ^ dsc); + } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */ + e = x - 0x17; + return decodeSignificandReal(buff, e, false ^ dsc); + } else if (x == 0x22) { /* Large positive number: 0x22, E, M */ + e = (int) getVaruint64(buff, false ^ dsc); + return decodeSignificandReal(buff, e, false ^ dsc); + } else if (x == 0x23) { /* +inf */ + return Double.POSITIVE_INFINITY; + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x)); + } + } + + /** + * Encode a String value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeString(ByteBuffer buff, String s, Order ord) { + if (s.contains("\u0000")) + throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); + int start = buff.position(); + buff.put((byte) 0x24); + buff.put(s.getBytes(Charset.forName("UTF-8"))); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a String value. The backing array is not modified through use of + * this method. + */ + public static String decodeString(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x24 || header == (byte) 0xdb; + Order ord = header == 0x24 ? Order.ASCENDING : Order.DESCENDING; + byte[] a = buff.array(); + int start = buff.position(), i = start; + byte term = (byte) (ord == Order.ASCENDING ? 0x00 : 0xff); + while (a[i] != term) i++; + buff.position(++i); + if (Order.DESCENDING == ord) { + byte[] copy = Arrays.copyOfRange(a, start, i - 1); + ord.apply(copy); + return new String(copy, Charset.forName("UTF-8")); + } else { + return new String(a, start, i - start - 1, Charset.forName("UTF-8")); + } + } + + /** + * Calculate the expected blob-mid encoded length based on unencoded length. + *

+ * This method is package-private for use in tests. + *

+ */ + static int blobMidEncodedLength(int len) { + return ((len * 8) + 6) / 7 + 2; + } + + /** + * Calculate the expected blob-mid decoded length based on encoded length. + *

+ * This method is package-private for use in tests. + *

+ */ + static int blobMidDecodedLength(int len) { + return ((len * 7) - 6) / 8; + } + + /** + * Encode a Blob value, intermediate element in Key. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int + * encodeOneKeyValue(...) + */ + public static void encodeBlobMid(ByteBuffer buff, byte[] b, Order ord) { + // Blobs as intermediate entries are encoded as 7-bits per byte, null-terminated. + assert buff.remaining() >= blobMidEncodedLength(b.length) : "buffer overflow expected."; + int start = buff.position(); + buff.put((byte) 0x25); /* Blob-mid */ + byte s = 1, t = 0; + for (int i = 0; i < b.length; i++) { + buff.put((byte) (0x80 | t | ((b[i] & 0xff) >>> s))); + if (s < 7) { + t = (byte) (b[i] << (7 - s)); + s++; + } else { + buff.put((byte) (0x80 | b[i])); + s = 1; + t = 0; + } + } + if (s > 1) buff.put((byte) (0x80 | t)); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a blob value that was encoded using BlobMid encoding. The backing + * array is not modified through use of this method. + */ + public static byte[] decodeBlobMid(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x25 || header == (byte) 0xda; + boolean isDsc = header != 0x25; + byte[] a = buff.array(); + int start = buff.position(), i = start; + byte term = (byte) (isDsc ? 0xff : 0x00); + while (a[i] != term) i++; + if (i - start == 0) { + // skip empty input buffer. + buff.get(); + return new byte[0]; + } + ByteBuffer ret = ByteBuffer.allocate(blobMidDecodedLength(i - start + 1)); + int s = 6; + byte t = (byte) (((isDsc ? a[start] ^ 0xff : a[start]) << 1) & 0xff); + for (i = start + 1; a[i] != term; i++) { + if (s == 7) { + ret.put((byte) (t | ((isDsc ? a[i] ^ 0xff : a[i]) & 0x7f))); + i++; + } else { + ret.put((byte) (t | (((isDsc ? a[i] ^ 0xff : a[i]) & 0x7f) >>> s))); + } + t = (byte) (((isDsc ? a[i] ^ 0xff : a[i]) << 8 - s) & 0xff); + s = s == 1 ? 7 : s - 1; + } + buff.position(++i); + assert t == 0 : "Unexpected bits remaining after decoding blob."; + return ret.array(); + } + + /** + * Encode a Blob value, last element in Key. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeBlobLast(ByteBuffer buff, byte[] b, Order ord) { + // Blobs as final entry in a compound key are written unencoded. + assert buff.remaining() >= b.length + 1; + int start = buff.position(); + buff.put((byte) 0x26); + buff.put(b); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() - start); + } + + /** + * Decode a Blob value, last element in Key. The backing array is not + * modified through use of this method. + */ + public static byte[] decodeBlobLast(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x26 || header == (byte) 0xd9; + Order ord = header == 0x26 ? Order.ASCENDING : Order.DESCENDING; + int length = buff.limit() - buff.position() - 1; + byte[] ret = new byte[length]; + buff.get(ret); + buff.get(); // throw away the termination marker. + ord.apply(ret, 0, ret.length); + return ret; + } + + /** + * Encode a null value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeNull(ByteBuffer buff, Order ord) { + buff.put(ord.apply((byte) 0x05)); + } + + /** + * Encode a single value into a buff. + */ + public static void encode(ByteBuffer buff, Object val) { + encode(buff, val, Order.ASCENDING, true); + } + + /** + * Encode a single value into a buff. + */ + public static void encode(ByteBuffer buff, Object val, Order ord) { + encode(buff, val, ord, true); + } + + /** + * Encode a single value into buff. + * TODO: refactor this so that users can register new type encoders. + * @param buff the destination. + * @param val the object to encode. + * @param ord the Order to apply. + * @param isLast indicate that this value is final in a sequence of values. + */ + private static void encode(ByteBuffer buff, Object val, Order ord, boolean isLast) { + if (null == val) { + encodeNull(buff, ord); + return; + } + Class c = val.getClass(); + if (Boolean.class.isAssignableFrom(c) || Character.class.isAssignableFrom(c) + || Byte.class.isAssignableFrom(c) || Short.class.isAssignableFrom(c) + || Integer.class.isAssignableFrom(c) || Long.class.isAssignableFrom(c)) { + encodeInt(buff, (Long) val, ord); + return; + } + if (Float.class.isAssignableFrom(c) || Double.class.isAssignableFrom(c)) { + encodeReal(buff, (Double) val, ord); + return; + } + if (String.class.isAssignableFrom(c)) { + encodeString(buff, (String) val, ord); + return; + } + if (byte[].class.isAssignableFrom(c)) { + if (isLast) encodeBlobLast(buff, (byte[]) val, ord); + else encodeBlobMid(buff, (byte[]) val, ord); + return; + } + + throw new IllegalArgumentException( + "No registered handler for Object of type " + val.getClass().getSimpleName()); + } + + /** + * Encode a sequence of values into a compound key. + */ + public static void encode(ByteBuffer buff, Object[] vals) { + Order[] orders = new Order[vals.length]; + Arrays.fill(orders, Order.ASCENDING); + encode(buff, vals, orders); + } + + /** + * Encode a sequence of values into a compound key. + */ + public static void encode(ByteBuffer buff, Object[] vals, Order ord) { + Order[] orders = new Order[vals.length]; + Arrays.fill(orders, ord); + encode(buff, vals, orders); + } + + /** + * Encode a sequence of values into a compound key. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, int + * sqlite4VdbeEncodeKey(...) + */ + public static void encode(ByteBuffer buff, Object[] vals, Order[] orders) { + if (vals.length != orders.length) + throw new IllegalArgumentException("vals and orders array lengths do not match."); + + for (int i = 0; i < vals.length; i++) { + encode(buff, vals[i], orders[i], i == vals.length - 1); + } + } + + /** + * Decode compound key entries. The backing array is not modified through + * use of this method. + */ + public static Object[] decode(ByteBuffer buff) { + ArrayList ret = new ArrayList(); + while (buff.position() != buff.limit()) { + byte header = buff.array()[buff.position()]; + if (header == 0x05 || header == (byte) 0xfa) { /* NULL */ + ret.add(null); + buff.get(); + } else if (header == 0x24 || header == (byte) 0xdb) { /* Text */ + ret.add(decodeString(buff)); + } else if (header == 0x25 || header == (byte) 0xda) { /* Blob-mid */ + ret.add(decodeBlobMid(buff)); + } else if (header == 0x26 || header == (byte) 0xd9) { /* Blob-last */ + ret.add(decodeBlobLast(buff)); + } else if ((header >= 0x06 && header <= 0x23) || + (header <= (byte) 0xf9 && header >= (byte) 0xDC)) { /* numerics */ + ret.add(decodeReal(buff)); + } else { + assert false : "Unrecognized header byte 0x" + Long.toHexString(header); + } + } + return ret.toArray(); + } + + /** + * Skip buff's cursor forward one encoded value. + * TODO: don't compute significands, just skip over them. + */ + public static void skip(ByteBuffer buff) { + byte x = buff.get(); + int e; + boolean dsc = (-1 == Integer.signum(x)); + if (dsc) x = (byte) ((x ^ 0xff) & 0xff); + + switch (x) { + case 0x05: /* null */ + case 0x06: /* NaN */ + case 0x07: /* -inf */ + case 0x15: /* zero */ + case 0x23: /* +inf */ + return; + case 0x08: /* Large negative number: 0x08, ~E, ~M */ + e = (int) getVaruint64(buff, true ^ dsc); + decodeSignificandReal(buff, e, true ^ dsc); + return; + case 0x14: /* Small negative number: 0x14, -E, ~M */ + e = (int) -getVaruint64(buff, false ^ dsc); + decodeSignificandReal(buff, e, true ^ dsc); + return; + case 0x16: /* Small positive number: 0x16, ~-E, M */ + e = (int) -getVaruint64(buff, true ^ dsc); + decodeSignificandReal(buff, e, false ^ dsc); + return; + case 0x22: /* Large positive number: 0x22, E, M */ + e = (int) getVaruint64(buff, false ^ dsc); + decodeSignificandReal(buff, e, false ^ dsc); + return; + case 0x09: /* Medium negative number: 0x13-E, ~M */ + case 0x10: + case 0x11: + case 0x12: + case 0x13: + e = 0x13 - x; + decodeSignificandReal(buff, e, true ^ dsc); + return; + case 0x17: /* Medium positive number: 0x17+E, M */ + case 0x18: + case 0x19: + case 0x20: + case 0x21: + e = x - 0x17; + decodeSignificandReal(buff, e, false ^ dsc); + return; + case 0x24: /* Text */ + case 0x25: /* Blob-mid */ + case 0x26: /* Blob-last */ + // for null-terminated values, skip to the end. + do { + x = (byte) (dsc ? buff.get() ^ 0xff : buff.get()); + } while (x != 0); + return; + default: + throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x)); + } + } + + /** + * Return the number of encoded entries remaining in buff. The + * state of buff is not modified through use of this method. + */ + public static int length(ByteBuffer buff) { + ByteBuffer b = buff.duplicate(); + int cnt = 0; + for (cnt = 0; b.position() != b.limit(); cnt++) { skip(b); } + return cnt; + } +} diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java new file mode 100644 index 0000000..a9e65e4 --- /dev/null +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java @@ -0,0 +1,444 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.util.OrderedBytes.Order; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestOrderedBytes { + + /** + * Tests the variable uint64 encoding. + *

+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775 + * 281474976710655 72057594037927935 18446744073709551615
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 18446744073709551615 = ffffffffffffffffff
+ *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + */ + @Test + public void testVaru64Boundaries() { + int len; + + long boundries[] = + { 240L, 2287L, 67823L, 16777215L, 4294967295L, 1099511627775L, 281474976710655L, + 72057594037927935L, -1L }; + int byte_lens[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + assertEquals("Broken test!", boundries.length, byte_lens.length); + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (boolean comp : new boolean[] { true, false }) { + for (int i = 0; i < boundries.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(byte_lens[i] + 1); + buf.get(); // skip first byte + len = OrderedBytes.putVaruint64(buf, boundries[i], comp); + assertEquals("Surprising serialized length.", byte_lens[i], len); + assertEquals(buf.limit(), buf.position()); + buf.flip(); + buf.get(); // skip first byte + assertEquals("Length inspection failed.", i + 1, OrderedBytes.lengthVaru64(buf, comp)); + assertEquals("Deserialization failed.", boundries[i], OrderedBytes.getVaruint64(buf, comp)); + assertEquals(buf.limit(), buf.position()); + } + } + } + + /** + * Test integer encoding. Example input values come from reference wiki + * page. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ + @Test + public void testInt() { + Long[] vals = + { 0L, 1L, 10L, 99L, 100L, 1234L, 9999L, 10000L, 10001L, 12345L, 123450L, Long.MAX_VALUE }; + int[] expectedLengths = { 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 11 }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(expectedLengths[i] + 1); + buf1.get(); + OrderedBytes.encodeInt(buf1, vals[i], ord); + buf1.flip(); + buf1.get(); + assertEquals( + "Decoded value does not match expected value.", + vals[i].longValue(), OrderedBytes.decodeInt(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(expectedLengths[i] + 1); + buf.get(); + OrderedBytes.encodeInt(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Long[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + long decoded = OrderedBytes.decodeInt(buf); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].longValue(), decoded); + } + } + } + + /** + * Test real encoding. Example input values come from reference wiki page. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ + @Test + public void testReal() { + Double[] vals = + { 0.0, 0.00123, 0.0123, 0.123, 1.0, 10.0, 12.345, 99.0, 99.01, 99.0001, 100.0, 100.01, + /*100.1,*/ 1234.0, 1234.5, 9999.0, /*9999.000001, 9999.000009, 9999.00001, 9999.00009,*/ + /*9999.000099, 9999.0001,*/ 9999.001, 9999.01, 9999.1, 10000.0, 10001.0, 12345.0, 123450.0, + /*(double) Long.MAX_VALUE,*/ Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN }; + + int[] expectedLengths = + { 1, 4, 4, 4, 2, 2, 4, 2, 3, 4, 2, 4, + /*4,*/ 3, 4, 3, /*6, 6, 6, 6,*/ + /*6, 6,*/ 5, 4, 4, 2, 4, 4, 4, + /*11,*/ 1, 1, 1 }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(expectedLengths[i] + 1); + buf1.get(); + OrderedBytes.encodeReal(buf1, vals[i], ord); + assertEquals(buf1.limit(), buf1.position()); + buf1.flip(); + buf1.get(); + assertEquals( + "Decoded value does not match expected value.", + vals[i].doubleValue(), OrderedBytes.decodeReal(buf1), 0.000001); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(expectedLengths[i] + 1); + buf.get(); + OrderedBytes.encodeReal(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Double[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, OrderedBytes.REAL_CMP); + else Arrays.sort(sortedVals, Collections.reverseOrder(OrderedBytes.REAL_CMP)); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + double decoded = OrderedBytes.decodeReal(buf); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].doubleValue(), decoded, 0.0000001); + } + } + } + + /** + * Test string encoding. + */ + @Test + public void testString() { + String[] vals = { "foo", "bar", "baz" }; + int expectedLengths[] = { 5, 5, 5 }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf1 = ByteBuffer.allocate(expectedLengths[i] + 1); + buf1.get(); + OrderedBytes.encodeString(buf1, vals[i], ord); + buf1.flip(); + buf1.get(); + assertEquals( + "Decoded value does not match expected value.", + vals[i], OrderedBytes.decodeString(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(expectedLengths[i] + 1); + buf.get(); + OrderedBytes.encodeString(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + String[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + String decoded = OrderedBytes.decodeString(buf); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testStringNoNullChars() { + ByteBuffer buff = ByteBuffer.allocate(3); + OrderedBytes.encodeString(buff, "\u0000", Order.ASCENDING); + } + + /** + * Test Blob-mid encoding. + */ + @Test + public void testBlobMid() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa } }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + ByteBuffer buf1 = ByteBuffer.allocate(OrderedBytes.blobMidEncodedLength(val.length) + 1); + buf1.get(); + OrderedBytes.encodeBlobMid(buf1, val, ord); + buf1.flip(); + buf1.get(); + assertArrayEquals( + "Decoded value does not match expected value.", + val, OrderedBytes.decodeBlobMid(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(OrderedBytes.blobMidEncodedLength(vals[i].length) + 1); + buf.get(); + OrderedBytes.encodeBlobMid(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + byte[] decoded = OrderedBytes.decodeBlobMid(buf); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + /** + * Test Blob-last encoding. + */ + @Test + public void testBlobLast() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa } }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + ByteBuffer buf1 = ByteBuffer.allocate(val.length + 3); + buf1.get(); + OrderedBytes.encodeBlobLast(buf1, val, ord); + buf1.flip(); + buf1.get(); + assertArrayEquals( + "Decoded value does not match expected value.", + val, OrderedBytes.decodeBlobLast(buf1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + ByteBuffer buf = ByteBuffer.allocate(vals[i].length + 3); + buf.get(); + OrderedBytes.encodeBlobLast(buf, vals[i], ord); + encoded[i] = buf.array(); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + ByteBuffer buf = ByteBuffer.wrap(encoded[i]); + buf.get(); + byte[] decoded = OrderedBytes.decodeBlobLast(buf); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + /** + * Test creation and consumption of compound rowkeys. + */ + @Test + public void testCompound() { + + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + Object[] vals = + new Object[] { -9999.001, "foo", null, 0.00123, "bar".getBytes(), + Double.NEGATIVE_INFINITY, Double.NaN }; + ByteBuffer buff = ByteBuffer.allocate(25); + + /* + * use a buffer 2 bytes longer than necessary, assert first and last + * bytes are not modified by encoding. + */ + buff.put((byte) (ord == Order.ASCENDING ? 0xff : 0x00)); + buff.put(buff.limit() - 1, (byte) (ord == Order.ASCENDING ? 0xff : 0x00)); + OrderedBytes.encode(buff, vals, ord); + assertEquals((byte) (ord == Order.ASCENDING ? 0xff : 0x00), buff.array()[0]); + assertEquals((byte) (ord == Order.ASCENDING ? 0xff : 0x00), buff.array()[buff.limit() - 1]); + byte[] encoded = Arrays.copyOf(buff.array(), buff.array().length); + + /* + * assert a sequence of values are decoded correctly. + */ + buff.flip(); + buff.get(); + Object[] outputs = OrderedBytes.decode(buff); + assertEquals((Double) vals[0], ((Double) outputs[0]).doubleValue(), 0.000001); + assertEquals(vals[1], outputs[1]); + assertEquals(vals[2], outputs[2]); + assertEquals(((Double) vals[3]), ((Double) outputs[3]).doubleValue(), 0.000001); + assertArrayEquals((byte[]) vals[4], (byte[]) outputs[4]); + assertEquals(vals[5], outputs[5]); + assertEquals(vals[6], outputs[6]); + assertArrayEquals("decoding modified source array.", encoded, buff.array()); + + /* + * assert length calculator works. + */ + buff.flip(); + buff.get(); + assertEquals(vals.length, OrderedBytes.length(buff)); + assertArrayEquals("length operation modified source array.", encoded, buff.array()); + + /* + * assert entry skipping works. + */ + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x24), buff.array()[buff.position()]); // "foo" + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x05), buff.array()[buff.position()]); // null + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x16), buff.array()[buff.position()]); // 0.00123 + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x25), buff.array()[buff.position()]); // "bar".getBytes() + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x07), buff.array()[buff.position()]); // -inf + OrderedBytes.skip(buff); + assertEquals(ord.apply((byte) 0x06), buff.array()[buff.position()]); // NaN + assertArrayEquals("skip operation modified source array.", encoded, buff.array()); + } + } +} -- 1.8.1