From ef040e8094d339f8ebb1f3d0db796f63a2c464f9 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Mon, 3 Jun 2013 12:06:41 -0700 Subject: [PATCH] HBASE-8201 OrderedBytes provides order-preserving serialization WIP Commit. OrderedBytes provides a serialization format in which the resulting byte[] retains the same sort order as the natural types. Serialized formats can be inspected and decoded without forward knowledge of their content. Implementations are provided for Numeric values with up to 64-bits of precision, Strings, and byte[]. The encoding format is modeled after the SQLite4 key encoding format. See http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki for details. TODOs include: - descending sort order - encoding and decoding sequences of values ("compound rowkey") - bounds/error checking with String encoding - support for "mid" blob encoding and decoding, allowing byte[] to be used in the middle of compound keys. - utilities for counting, skipping encoded values on a ByteBuffer - consider using BigInteger/BigDecimal to allow for arbitrary size and precision numerics. --- .../org/apache/hadoop/hbase/util/OrderedBytes.java | 614 +++++++++++++++++++++ .../apache/hadoop/hbase/util/TestOrderedBytes.java | 153 +++++ 2 files changed, 767 insertions(+) create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java create mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java new file mode 100644 index 0000000..69d4675 --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java @@ -0,0 +1,614 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Utility class that handles ordered byte arrays. That is, unlike + * {@link Bytes}, these methods produce byte arrays which maintain the sort + * order of the original values. + *

+ * Encoding specification is nicked from SQLite4's encoding scheme, hence the + * external links. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OrderedBytes { + + /** + * The order in which an OrderedBytes implementation will sort, according to + * the natural order of the underlying type. + */ + public enum Order { + ASCENDING ((byte)0x00), + DESCENDING ((byte)0xff); + + private final byte mask; + + /** + * The bit-mask used to invert a value according to this order. + */ + public byte mask() { return mask; } + + /** + * Returns the adjusted trichotomous value according to the ordering + * imposed by this Order. + */ + public int cmp(int cmp) { + return cmp * (this == ASCENDING ? 1 : -1); + } + + /** + * Apply order to the raw byte array a according to the + * Order. + */ + public void apply(byte[] a, int offset, int length) { + if (this != DESCENDING) return; + for (int i = offset; i - offset < length; i++) { + a[i] ^= this.mask; + } + } + + @Override + public String toString() { return this == ASCENDING ? "asc" : "dsc"; } + + Order(byte mask) { this.mask = mask; } + } + + /** + * Perform unsigned comparison between two long values. Conforms to the same + * interface as {@link Comparator#compare(Object, Object)}. + */ + private static int unsignedCmp(long x1, long x2) { + int cmp; + if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; + // invert the result when either value is negative + if ((x1 < 0) != (x2 < 0)) return -cmp; + return cmp; + } + + /** + * Write a 32-bit unsigned integer to dst as 4 big-endian + * bytes. + * @return number of bytes written. + */ + private static int putUint32(ByteBuffer dst, int val) { + dst.put((byte) (val >>> 24)) + .put((byte) (val >>> 16)) + .put((byte) (val >>> 8)) + .put((byte) val); + return 4; + } + + /** + * Encode an unsigned 64-bit integer val into dst. + * Compliment the encoded value when comp is true. + *

+ * This method is package-private for testing. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/varint.c, int + * sqlite4PutVarint64(unsigned char *z, sqlite4_uint64 x) + */ + static int putVaruint64(ByteBuffer dst, long val, boolean comp) { + int w, y, start = dst.position(); + byte[] a = dst.array(); + if (-1 == unsignedCmp(val, 241L)) { + dst.put((byte) val); + if (comp) Order.DESCENDING.apply(a, start, 1); + return 1; + } + if (-1 == unsignedCmp(val, 2288L)) { + y = (int) (val - 240); + dst.put((byte) (y / 256 + 241)) + .put((byte) (y % 256)); + if (comp) Order.DESCENDING.apply(a, start, 2); + return 2; + } + if (-1 == unsignedCmp(val, 67824L)) { + y = (int) (val - 2288); + dst.put((byte) 249) + .put((byte) (y / 256)) + .put((byte) (y % 256)); + if (comp) Order.DESCENDING.apply(a, start, 3); + return 3; + } + y = (int) (val & 0xffffffff); + w = (int) (val >>> 32); + if (w == 0) { + if (-1 == unsignedCmp(y, 16777216L)) { + dst.put((byte) 250) + .put((byte) (y >>> 16)) + .put((byte) (y >>> 8)) + .put((byte) y); + if (comp) Order.DESCENDING.apply(a, start, 4); + return 4; + } + dst.put((byte) 251); + putUint32(dst, y); + if (comp) Order.DESCENDING.apply(a, start, 5); + return 5; + } + if (-1 == unsignedCmp(w, 256L)) { + dst.put((byte) 252) + .put((byte) w); + putUint32(dst, y); + if (comp) Order.DESCENDING.apply(a, start, 6); + return 6; + } + if (-1 == unsignedCmp(w, 65536L)) { + dst.put((byte) 253) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + if (comp) Order.DESCENDING.apply(a, start, 7); + return 7; + } + if (-1 == unsignedCmp(w, 16777216L)) { + dst.put((byte) 254) + .put((byte) (w >>> 16)) + .put((byte) (w >>> 8)) + .put((byte) w); + putUint32(dst, y); + if (comp) Order.DESCENDING.apply(a, start, 8); + return 8; + } + dst.put((byte) 255); + putUint32(dst, w); + putUint32(dst, y); + if (comp) Order.DESCENDING.apply(a, start, 9); + return 9; + } + + /** + * Inspect an encoded varu64 for it's encoded length. Does not modify + * src's state. + *

+ * This method is package-private for testing. + *

+ * @param src source buffer + * @param comp if true, parse the compliment of the value. + * @return number of bytes consumed by this value + * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + */ + static int lengthVaru64(ByteBuffer src, boolean comp) { + byte[] a = src.array(); + int i = src.position(); + int a0 = (comp ? a[i] ^ 0xff : a[i]) & 0xff; + if (a0 <= 240) return 1; + if (a0 >= 241 && a0 <= 248) return 2; + if (a0 == 249) return 3; + if (a0 == 250) return 4; + if (a0 == 251) return 5; + if (a0 == 252) return 6; + if (a0 == 253) return 7; + if (a0 == 254) return 8; + if (a0 == 255) return 9; + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(a[i])); + } + + /** + * Decode a sequence of bytes in src as an unsigned 64-bit + * integer. Compliment the encoded value when comp is true. + *

+ * This method is package-private for testing. + *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/varint.c, int + * sqlite4GetVarint64(const unsigned char *z, int n, sqlite4_uint64 + * *pResult) + */ + static long getVaruint64(ByteBuffer src, boolean comp) { + assert src.remaining() >= lengthVaru64(src, comp); + long ret; + byte x = src.get(); + int a0 = (comp ? x ^ 0xff : x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; + if (-1 == unsignedCmp(a0, 241)) { + return a0; + } + x = src.get(); + a1 = (comp ? x ^ 0xff : x) & 0xff; + if (-1 == unsignedCmp(a0, 249)) { + return (a0 - 241) * 256 + a1 + 240; + } + x = src.get(); + a2 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 249) { + return 2288 + 256 * a1 + a2; + } + x = src.get(); + a3 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 250) { + return (a1 << 16) | (a2 << 8) | a3; + } + x = src.get(); + a4 = (comp ? x ^ 0xff : x) & 0xff; + // seed ret with unshifted a1 because sign-extension bites us when casting (long) (a1 << 24). + ret = a1; + ret = (ret << 24) | ((a2 & 0xff) << 16) | ((a3 & 0xff) << 8) | (a4 & 0xff); + if (a0 == 251) { + return ret; + } + x = src.get(); + a5 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 252) { + return (ret << 8) | a5; + } + x = src.get(); + a6 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 253) { + return (ret << 16) | (a5 << 8) | a6; + } + x = src.get(); + a7 = (comp ? x ^ 0xff : x) & 0xff; + if (a0 == 254) { + return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; + } + x = src.get(); + a8 = (comp ? x ^ 0xff : x) & 0xff; + return (ret << 32) | (0xffffffff & ((a5 << 24) | (a6 << 16) | (a7 << 8) | a8)); + } + + /** + * Helper for encoding the positive integer m using the key + * encoding. Appends the significand M to buff and + * returns the associated Exponent. Write the compliment of e + * to buff when ecomp is true. Write the + * compliment of M to buff when mcomp + * is true. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int + * encodeIntKey(sqlite4_uint64 m, KeyEncoder *p) + */ + private static int encodeIntKey(ByteBuffer buff, long m, boolean ecomp, boolean mcomp) { + assert m > 0; + int i = 0, e, startM; + byte[] digits = new byte[20]; + do { + digits[i++] = (byte) ((m % 100) & 0xff); + m /= 100; + } while (m > 0); + e = i; + assert e >= 1 && e <= 10; + if (e > 10) putVaruint64(buff, e, ecomp); + startM = buff.position(); + while (i > 0) + buff.put((byte) ((digits[--i] * 2 + 1) & 0xff)); + buff.array()[buff.position() - 1] &= 0xfe; + if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM); + return e; + } + + /** + * Encode an integer value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeInt(ByteBuffer buff, long v, Order ord) { + int e, i, start = buff.position(); + if (v == 0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (v < 0) { + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeIntKey(buff, -v, true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeIntKey(buff, v, false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() +1); + } + + /** + * Read significand digits from buff according to the magnitude + * of e. Uses a long for the accumulator. + *

+ * Note: + *

+ *

+ */ + private static long decodeSignificandInt(ByteBuffer buff, int e, boolean comp) { + byte[] a = buff.array(); + long m = 0; + long p = e - 1; + for (int i = buff.position();; i++) { + m += Math.pow(100.0, p) * (((comp ? a[i] ^ 0xff : a[i]) & 0xff) / 2); + p--; + if (((comp ? a[i] ^ 0xff : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + return m; + } + + /** + * Read significand digits from buff according to the magnitude + * of e. Uses a double for the accumulator. Treat + * encoded bytes as compliments when comp is true. + *

+ * Note: + *

    + *
  • separate methods for Int and Real are necessary because double loses + * precision around -Long.MAX_VALUE.
  • + *
  • after roughly 10 significand bytes, the accumulator flips over to + * infinity, thus extremely large values such as + * Double.MAX_VALUE cannot be decoded.
  • + *
+ *

+ */ + private static double decodeSignificandReal(ByteBuffer buff, int e, boolean comp) { + byte[] a = buff.array(); + double m = 0; + double p = e - 1; + for (int i = buff.position();; i++) { + m += Math.pow(100.0, p) * (((comp ? a[i] ^ 0xff : a[i]) & 0xff) / 2); + p--; + if (((comp ? a[i] ^ 0xff : a[i]) & 1) == 0) { + buff.position(i + 1); + break; + } + } + return m; + } + + /** + * Decode an integer value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int sqlite4VdbeDecodeIntKey(...) + */ + public static long decodeInt(ByteBuffer buff) { + byte x = buff.get(); + int e = 0; + if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */ + e = 0x13 - x; + return (long) -decodeSignificandInt(buff, e, true); + } else if (x == 0x15 || x == 0xea) { /* Numeric zero */ + return 0; + } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */ + e = x - 0x17; + return (long) decodeSignificandInt(buff, e, false); + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x)); + } + } + + /** + * Compare two doubles for equality, within a margin of error. + */ + private static boolean doubleEquals(double r, double l) { + return r == l ? true : Math.abs(r - l) < 0.00001; + } + + /** + * Encode the small positive floating point number r using the key encoding. + * The caller guarantees that r will be less than 1.0 and greater than 0.0. + * Write the compliment of e to buff when + * ecomp is true. Write the compliment of M to + * buff when mcomp is true. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static void + * encodeSmallFloatKey(double r, KeyEncoder *p) + */ + private static void encodeSmallFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) { + assert r > 0.0 && r < 1.0; + int e = 0, d, startM; + while (r < 1e-10) { r *= 1e8; e += 4; } + while (r < 0.01) { r *= 100.0; e++; } + putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) { + r *= 100.0; + d = (int) r; + buff.put((byte) ((2 * d + 1) & 0xff)); + r -= d; + } + buff.array()[buff.position() - 1] &= 0xfe; + if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM); + } + + /** + * Encode the large positive floating point number r using the key encoding. + * The caller guarantees that r will be finite and greater than or equal to + * 1.0.Write the compliment of e to buff when + * ecomp is true. Write the compliment of M to + * buff when mcomp is true. + * @return E(xponent) + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int + * encodeLargeFloatKey(double r, KeyEncoder *p) + */ + private static int encodeLargeFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) { + assert r >= 1.0; + int e = 0, d, startM; + while (r >= 1e32 && e <= 350) { r *= 1e-32; e +=16; } + while (r >= 1e8 && e <= 350) { r *= 1e-8; e+= 4; } + while (r >= 1.0 && e <= 350) { r *= 0.01; e++; } + if (e > 10) putVaruint64(buff, e, ecomp); + startM = buff.position(); + for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) { + r *= 100.0; + d = (int) r; + buff.put((byte) ((2 * d + 1) & 0xff)); + r -= d; + } + buff.array()[buff.position() - 1] &= 0xfe; + if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM); + return e; + } + + /** + * Encode a Real value. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeReal(ByteBuffer buff, double r, Order ord) { + int e, i, start = buff.position(); + if (r == 0.0) { + buff.put((byte) 0x15); /* Numeric zero */ + } else if (Double.isNaN(r)) { + buff.put((byte) 0x06); /* NaN */ + } else if (Double.NEGATIVE_INFINITY == r) { + buff.put((byte) 0x07); + } else if (Double.POSITIVE_INFINITY == r) { + buff.put((byte) 0x23); + } else if (r <= -1.0) { + i = buff.position(); + buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */ + e = encodeLargeFloatKey(buff, -r, true, true); + if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */ + } else if (r < 0.0) { + buff.put((byte) 0x14); /* Small negative number: 0x14, -E, ~M */ + encodeSmallFloatKey(buff, -r, false, true); + } else if (r < 1.0) { + buff.put((byte) 0x16); /* Small positive number: 0x16, ~-E, M */ + encodeSmallFloatKey(buff, r, true, false); + } else { + i = buff.position(); + buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */ + e = encodeLargeFloatKey(buff, r, false, false); + if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */ + } + ord.apply(buff.array(), start, buff.position() +1); + } + + /** + * Decode a Real value. + */ + public static double decodeReal(ByteBuffer buff) { + byte x = buff.get(); + int e = 0; + if (x == 0x06 || x == (byte) 0xf9) { /* NaN */ + return Double.NaN; + } else if (x == 0x07 || x == (byte) 0xf8) { /* -inf */ + return Double.NEGATIVE_INFINITY; + } else if (x == 0x08) { /* Large negative number: 0x08, ~E, ~M */ + e = (int) getVaruint64(buff, true); + return -decodeSignificandReal(buff, e, true); + } else if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */ + e = 0x13 - x; + return -decodeSignificandReal(buff, e, true); + } else if (x == 0x14) { /* Small negative number: 0x14, -E, ~M */ + e = (int) -getVaruint64(buff, false); + return -decodeSignificandReal(buff, e, true); + } else if (x == 0x15 || x == (byte) 0xea) { /* zero */ + return Double.valueOf(0); + } else if (x == 0x16) { /* Small positive number: 0x16, ~-E, M */ + e = (int) -getVaruint64(buff, true); + return decodeSignificandReal(buff, e, false); + } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */ + e = x - 0x17; + return decodeSignificandReal(buff, e, false); + } else if (x == 0x22) { /* Large positive number: 0x22, E, M */ + e = (int) getVaruint64(buff, false); + return decodeSignificandReal(buff, e, false); + } else if (x == 0x23 || x == (byte) 0xdc) { /* +inf */ + return Double.POSITIVE_INFINITY; + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x)); + } + } + + /** + * Encode a String value. + * TODO: assert s does not contain any 0x00 bytes. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeString(ByteBuffer buff, String s, Order ord) { + int start = buff.position(); + buff.put((byte) 0x24); + buff.put(s.getBytes(Charset.forName("UTF-8"))); + buff.put((byte) 0x00); + ord.apply(buff.array(), start, buff.position() +1); + } + + /** + * Decode a String value. + */ + public static String decodeString(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x24 || header == (byte) 0xdb; + Order ord = header == 0x24 ? Order.ASCENDING : Order.DESCENDING; + byte[] a = buff.array(); + int offset = buff.position(); + int i = offset; + byte term = (byte) (ord == Order.ASCENDING ? 0x00 : 0xff); + while (a[i] != term) i++; + buff.position(++i); + ord.apply(a, offset, i - offset); + return new String(a, offset, i - offset - 1, Charset.forName("UTF-8")); + } + + /** + * Encode a Blob value, last element in Key. + * + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, + * static int encodeOneKeyValue(...) + */ + public static void encodeBlobLast(ByteBuffer buff, byte[] b, Order ord) { + // Blobs as final entry in a compound key are written unencoded. + assert buff.remaining() >= b.length + 1; + int start = buff.position(); + buff.put((byte) 0x26); + buff.put(b); + ord.apply(buff.array(), start, buff.position() +1); + } + + /** + * Decode a Blob value, last element in Key. + */ + public static byte[] decodeBlobLast(ByteBuffer buff) { + byte header = buff.get(); + assert header == 0x26 || header == (byte) 0xd9; + Order ord = header == 0x26 ? Order.ASCENDING : Order.DESCENDING; + int length = buff.limit() - buff.position(); + byte[] ret = new byte[length]; + System.arraycopy(buff.array(), buff.position(), ret, 0, length); + ord.apply(ret, 0, ret.length); + return ret; + } +} diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java new file mode 100644 index 0000000..3e9be24 --- /dev/null +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.apache.hadoop.hbase.util.OrderedBytes.Order.ASCENDING; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestOrderedBytes { + + /** + * Tests the variable uint64 encoding. + *

+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775 + * 281474976710655 72057594037927935 18446744073709551615
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 18446744073709551615 = ffffffffffffffffff
+ *

+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki + */ + @Test + public void testVaru64Boundaries() { + ByteBuffer buf = ByteBuffer.allocate(9); + int len; + + long boundries[] = + { 240L, 2287L, 67823L, 16777215L, 4294967295L, 1099511627775L, 281474976710655L, + 72057594037927935L, -1L }; + int byte_lens[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + assertEquals("Broken test!", boundries.length, byte_lens.length); + + for (int i = 0; i < boundries.length; i++) { + buf.clear(); + len = OrderedBytes.putVaruint64(buf, boundries[i], false); + assertEquals("Surprising serialized length.", byte_lens[i], len); + buf.flip(); + assertEquals("Deserialization failed.", boundries[i], OrderedBytes.getVaruint64(buf, false)); + buf.flip(); + assertEquals("Length inspection failed.", i + 1, OrderedBytes.lengthVaru64(buf, false)); + + buf.clear(); + len = OrderedBytes.putVaruint64(buf, boundries[i], true); + assertEquals("Surprising serialized length.", byte_lens[i], len); + buf.flip(); + assertEquals("Deserialization failed.", boundries[i], OrderedBytes.getVaruint64(buf, true)); + buf.flip(); + assertEquals("Length inspection failed.", i + 1, OrderedBytes.lengthVaru64(buf, true)); + } + } + + /** + * Test integer encoding. Example input values come from reference wiki + * page. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ + @Test + public void testInt() { + long[] inputs = + { 0L, 1L, 10L, 99L, 100L, 1234L, 9999L, 10000L, 10001L, 12345L, 123450L, Long.MAX_VALUE }; + ByteBuffer output = ByteBuffer.allocate(18); + for (long in : inputs) { + for (long i : new long[] { in, -in }) { + output.clear(); + OrderedBytes.encodeInt(output, i, ASCENDING); + output.flip(); + assertEquals(i, OrderedBytes.decodeInt(output)); + } + } + } + + /** + * Test real encoding. Example input values come from reference wiki page. + * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki + */ + @Test + public void testReal() { + double[] inputs = + { 0.0, 0.00123, 0.0123, 0.123, 1.0, 10.0, 12.345, 99.0, 99.01, 99.0001, 100.0, 100.01, + 100.1, 1234, 1234.5, 9999, 9999.000001, 9999.000009, 9999.00001, 9999.00009, + 9999.000099, 9999.0001, 9999.001, 9999.01, 9999.1, 10000.0, 10001.0, 12345.0, 123450.0, + Long.MAX_VALUE }; + ByteBuffer output = ByteBuffer.allocate(27); + for (double in : inputs) { + for (double i : new double[] { in, -in }) { + output.clear(); + OrderedBytes.encodeReal(output, i, ASCENDING); + output.flip(); + assertEquals(i, OrderedBytes.decodeReal(output), 0.0001); + } + } + } + + /** + * Test string encoding. + */ + @Test + public void testString() { + String[] inputs = { "foo", "bar", "baz" }; + ByteBuffer output = ByteBuffer.allocate(10); + for (String in : inputs) { + output.clear(); + OrderedBytes.encodeString(output, in, ASCENDING); + output.flip(); + assertEquals(in, OrderedBytes.decodeString(output)); + } + } + + /** + * Test Blob encoding. + */ + @Test + public void testBlobLast() { + byte[][] inputs = { "foo".getBytes(), "bar".getBytes(), "".getBytes() }; + ByteBuffer output = ByteBuffer.allocate(10); + for (byte[] in : inputs) { + output.clear(); + OrderedBytes.encodeBlobLast(output, in, ASCENDING); + output.flip(); + assertArrayEquals(in, OrderedBytes.decodeBlobLast(output)); + } + } +} -- 1.8.1