From ef040e8094d339f8ebb1f3d0db796f63a2c464f9 Mon Sep 17 00:00:00 2001
From: Nick Dimiduk
+ * Encoding specification is nicked from SQLite4's encoding scheme, hence the
+ * external links.
+ *
+ * This method is package-private for testing.
+ *
+ * This method is package-private for testing.
+ *
+ * This method is package-private for testing.
+ *
+ * Note:
+ * Order.
+ */
+ public int cmp(int cmp) {
+ return cmp * (this == ASCENDING ? 1 : -1);
+ }
+
+ /**
+ * Apply order to the raw byte array a according to the
+ * Order.
+ */
+ public void apply(byte[] a, int offset, int length) {
+ if (this != DESCENDING) return;
+ for (int i = offset; i - offset < length; i++) {
+ a[i] ^= this.mask;
+ }
+ }
+
+ @Override
+ public String toString() { return this == ASCENDING ? "asc" : "dsc"; }
+
+ Order(byte mask) { this.mask = mask; }
+ }
+
+ /**
+ * Perform unsigned comparison between two long values. Conforms to the same
+ * interface as {@link Comparator#compare(Object, Object)}.
+ */
+ private static int unsignedCmp(long x1, long x2) {
+ int cmp;
+ if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0;
+ // invert the result when either value is negative
+ if ((x1 < 0) != (x2 < 0)) return -cmp;
+ return cmp;
+ }
+
+ /**
+ * Write a 32-bit unsigned integer to dst as 4 big-endian
+ * bytes.
+ * @return number of bytes written.
+ */
+ private static int putUint32(ByteBuffer dst, int val) {
+ dst.put((byte) (val >>> 24))
+ .put((byte) (val >>> 16))
+ .put((byte) (val >>> 8))
+ .put((byte) val);
+ return 4;
+ }
+
+ /**
+ * Encode an unsigned 64-bit integer val into dst.
+ * Compliment the encoded value when comp is true.
+ * src's state.
+ * src as an unsigned 64-bit
+ * integer. Compliment the encoded value when comp is true.
+ * m using the key
+ * encoding. Appends the significand M to buff and
+ * returns the associated Exponent. Write the compliment of e
+ * to buff when ecomp is true. Write the
+ * compliment of M to buff when mcomp
+ * is true.
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int
+ * encodeIntKey(sqlite4_uint64 m, KeyEncoder *p)
+ */
+ private static int encodeIntKey(ByteBuffer buff, long m, boolean ecomp, boolean mcomp) {
+ assert m > 0;
+ int i = 0, e, startM;
+ byte[] digits = new byte[20];
+ do {
+ digits[i++] = (byte) ((m % 100) & 0xff);
+ m /= 100;
+ } while (m > 0);
+ e = i;
+ assert e >= 1 && e <= 10;
+ if (e > 10) putVaruint64(buff, e, ecomp);
+ startM = buff.position();
+ while (i > 0)
+ buff.put((byte) ((digits[--i] * 2 + 1) & 0xff));
+ buff.array()[buff.position() - 1] &= 0xfe;
+ if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM);
+ return e;
+ }
+
+ /**
+ * Encode an integer value.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c,
+ * static int encodeOneKeyValue(...)
+ */
+ public static void encodeInt(ByteBuffer buff, long v, Order ord) {
+ int e, i, start = buff.position();
+ if (v == 0) {
+ buff.put((byte) 0x15); /* Numeric zero */
+ } else if (v < 0) {
+ i = buff.position();
+ buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeIntKey(buff, -v, true, true);
+ if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */
+ } else {
+ i = buff.position();
+ buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */
+ e = encodeIntKey(buff, v, false, false);
+ if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.array(), start, buff.position() +1);
+ }
+
+ /**
+ * Read significand digits from buff according to the magnitude
+ * of e. Uses a long for the accumulator.
+ *
+ *
+ * -Long.MAX_INT.Long.MIN_VALUE because of accumulator overflow.
buff according to the magnitude
+ * of e. Uses a double for the accumulator. Treat
+ * encoded bytes as compliments when comp is true.
+ *
+ * Note:
+ *
+ * - separate methods for Int and Real are necessary because double loses
+ * precision around
-Long.MAX_VALUE.
+ * - after roughly 10 significand bytes, the accumulator flips over to
+ * infinity, thus extremely large values such as
+ *
Double.MAX_VALUE cannot be decoded.
+ *
+ *
+ */
+ private static double decodeSignificandReal(ByteBuffer buff, int e, boolean comp) {
+ byte[] a = buff.array();
+ double m = 0;
+ double p = e - 1;
+ for (int i = buff.position();; i++) {
+ m += Math.pow(100.0, p) * (((comp ? a[i] ^ 0xff : a[i]) & 0xff) / 2);
+ p--;
+ if (((comp ? a[i] ^ 0xff : a[i]) & 1) == 0) {
+ buff.position(i + 1);
+ break;
+ }
+ }
+ return m;
+ }
+
+ /**
+ * Decode an integer value.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c,
+ * static int sqlite4VdbeDecodeIntKey(...)
+ */
+ public static long decodeInt(ByteBuffer buff) {
+ byte x = buff.get();
+ int e = 0;
+ if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */
+ e = 0x13 - x;
+ return (long) -decodeSignificandInt(buff, e, true);
+ } else if (x == 0x15 || x == 0xea) { /* Numeric zero */
+ return 0;
+ } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */
+ e = x - 0x17;
+ return (long) decodeSignificandInt(buff, e, false);
+ } else {
+ throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x));
+ }
+ }
+
+ /**
+ * Compare two doubles for equality, within a margin of error.
+ */
+ private static boolean doubleEquals(double r, double l) {
+ return r == l ? true : Math.abs(r - l) < 0.00001;
+ }
+
+ /**
+ * Encode the small positive floating point number r using the key encoding.
+ * The caller guarantees that r will be less than 1.0 and greater than 0.0.
+ * Write the compliment of e to buff when
+ * ecomp is true. Write the compliment of M to
+ * buff when mcomp is true.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static void
+ * encodeSmallFloatKey(double r, KeyEncoder *p)
+ */
+ private static void encodeSmallFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) {
+ assert r > 0.0 && r < 1.0;
+ int e = 0, d, startM;
+ while (r < 1e-10) { r *= 1e8; e += 4; }
+ while (r < 0.01) { r *= 100.0; e++; }
+ putVaruint64(buff, e, ecomp);
+ startM = buff.position();
+ for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) {
+ r *= 100.0;
+ d = (int) r;
+ buff.put((byte) ((2 * d + 1) & 0xff));
+ r -= d;
+ }
+ buff.array()[buff.position() - 1] &= 0xfe;
+ if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM);
+ }
+
+ /**
+ * Encode the large positive floating point number r using the key encoding.
+ * The caller guarantees that r will be finite and greater than or equal to
+ * 1.0.Write the compliment of e to buff when
+ * ecomp is true. Write the compliment of M to
+ * buff when mcomp is true.
+ * @return E(xponent)
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c, static int
+ * encodeLargeFloatKey(double r, KeyEncoder *p)
+ */
+ private static int encodeLargeFloatKey(ByteBuffer buff, double r, boolean ecomp, boolean mcomp) {
+ assert r >= 1.0;
+ int e = 0, d, startM;
+ while (r >= 1e32 && e <= 350) { r *= 1e-32; e +=16; }
+ while (r >= 1e8 && e <= 350) { r *= 1e-8; e+= 4; }
+ while (r >= 1.0 && e <= 350) { r *= 0.01; e++; }
+ if (e > 10) putVaruint64(buff, e, ecomp);
+ startM = buff.position();
+ for (int i = 0; i < 18 && !doubleEquals(r, 0.0); i++) {
+ r *= 100.0;
+ d = (int) r;
+ buff.put((byte) ((2 * d + 1) & 0xff));
+ r -= d;
+ }
+ buff.array()[buff.position() - 1] &= 0xfe;
+ if (mcomp) Order.DESCENDING.apply(buff.array(), startM, buff.position() - startM);
+ return e;
+ }
+
+ /**
+ * Encode a Real value.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c,
+ * static int encodeOneKeyValue(...)
+ */
+ public static void encodeReal(ByteBuffer buff, double r, Order ord) {
+ int e, i, start = buff.position();
+ if (r == 0.0) {
+ buff.put((byte) 0x15); /* Numeric zero */
+ } else if (Double.isNaN(r)) {
+ buff.put((byte) 0x06); /* NaN */
+ } else if (Double.NEGATIVE_INFINITY == r) {
+ buff.put((byte) 0x07);
+ } else if (Double.POSITIVE_INFINITY == r) {
+ buff.put((byte) 0x23);
+ } else if (r <= -1.0) {
+ i = buff.position();
+ buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeLargeFloatKey(buff, -r, true, true);
+ if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */
+ } else if (r < 0.0) {
+ buff.put((byte) 0x14); /* Small negative number: 0x14, -E, ~M */
+ encodeSmallFloatKey(buff, -r, false, true);
+ } else if (r < 1.0) {
+ buff.put((byte) 0x16); /* Small positive number: 0x16, ~-E, M */
+ encodeSmallFloatKey(buff, r, true, false);
+ } else {
+ i = buff.position();
+ buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */
+ e = encodeLargeFloatKey(buff, r, false, false);
+ if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.array(), start, buff.position() +1);
+ }
+
+ /**
+ * Decode a Real value.
+ */
+ public static double decodeReal(ByteBuffer buff) {
+ byte x = buff.get();
+ int e = 0;
+ if (x == 0x06 || x == (byte) 0xf9) { /* NaN */
+ return Double.NaN;
+ } else if (x == 0x07 || x == (byte) 0xf8) { /* -inf */
+ return Double.NEGATIVE_INFINITY;
+ } else if (x == 0x08) { /* Large negative number: 0x08, ~E, ~M */
+ e = (int) getVaruint64(buff, true);
+ return -decodeSignificandReal(buff, e, true);
+ } else if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */
+ e = 0x13 - x;
+ return -decodeSignificandReal(buff, e, true);
+ } else if (x == 0x14) { /* Small negative number: 0x14, -E, ~M */
+ e = (int) -getVaruint64(buff, false);
+ return -decodeSignificandReal(buff, e, true);
+ } else if (x == 0x15 || x == (byte) 0xea) { /* zero */
+ return Double.valueOf(0);
+ } else if (x == 0x16) { /* Small positive number: 0x16, ~-E, M */
+ e = (int) -getVaruint64(buff, true);
+ return decodeSignificandReal(buff, e, false);
+ } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */
+ e = x - 0x17;
+ return decodeSignificandReal(buff, e, false);
+ } else if (x == 0x22) { /* Large positive number: 0x22, E, M */
+ e = (int) getVaruint64(buff, false);
+ return decodeSignificandReal(buff, e, false);
+ } else if (x == 0x23 || x == (byte) 0xdc) { /* +inf */
+ return Double.POSITIVE_INFINITY;
+ } else {
+ throw new IllegalArgumentException("unexpected value in first byte: 0x" + Long.toHexString(x));
+ }
+ }
+
+ /**
+ * Encode a String value.
+ * TODO: assert s does not contain any 0x00 bytes.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c,
+ * static int encodeOneKeyValue(...)
+ */
+ public static void encodeString(ByteBuffer buff, String s, Order ord) {
+ int start = buff.position();
+ buff.put((byte) 0x24);
+ buff.put(s.getBytes(Charset.forName("UTF-8")));
+ buff.put((byte) 0x00);
+ ord.apply(buff.array(), start, buff.position() +1);
+ }
+
+ /**
+ * Decode a String value.
+ */
+ public static String decodeString(ByteBuffer buff) {
+ byte header = buff.get();
+ assert header == 0x24 || header == (byte) 0xdb;
+ Order ord = header == 0x24 ? Order.ASCENDING : Order.DESCENDING;
+ byte[] a = buff.array();
+ int offset = buff.position();
+ int i = offset;
+ byte term = (byte) (ord == Order.ASCENDING ? 0x00 : 0xff);
+ while (a[i] != term) i++;
+ buff.position(++i);
+ ord.apply(a, offset, i - offset);
+ return new String(a, offset, i - offset - 1, Charset.forName("UTF-8"));
+ }
+
+ /**
+ * Encode a Blob value, last element in Key.
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ * @see http://www.sqlite.org/src4/finfo?name=src/vdbecodec.c,
+ * static int encodeOneKeyValue(...)
+ */
+ public static void encodeBlobLast(ByteBuffer buff, byte[] b, Order ord) {
+ // Blobs as final entry in a compound key are written unencoded.
+ assert buff.remaining() >= b.length + 1;
+ int start = buff.position();
+ buff.put((byte) 0x26);
+ buff.put(b);
+ ord.apply(buff.array(), start, buff.position() +1);
+ }
+
+ /**
+ * Decode a Blob value, last element in Key.
+ */
+ public static byte[] decodeBlobLast(ByteBuffer buff) {
+ byte header = buff.get();
+ assert header == 0x26 || header == (byte) 0xd9;
+ Order ord = header == 0x26 ? Order.ASCENDING : Order.DESCENDING;
+ int length = buff.limit() - buff.position();
+ byte[] ret = new byte[length];
+ System.arraycopy(buff.array(), buff.position(), ret, 0, length);
+ ord.apply(ret, 0, ret.length);
+ return ret;
+ }
+}
diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java
new file mode 100644
index 0000000..3e9be24
--- /dev/null
+++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.apache.hadoop.hbase.util.OrderedBytes.Order.ASCENDING;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.SmallTests;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(SmallTests.class)
+public class TestOrderedBytes {
+
+ /**
+ * Tests the variable uint64 encoding.
+ *
+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775
+ * 281474976710655 72057594037927935 18446744073709551615
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 18446744073709551615 = ffffffffffffffffff
+ *
+ * @see http://sqlite.org/src4/doc/trunk/www/varint.wiki
+ */
+ @Test
+ public void testVaru64Boundaries() {
+ ByteBuffer buf = ByteBuffer.allocate(9);
+ int len;
+
+ long boundries[] =
+ { 240L, 2287L, 67823L, 16777215L, 4294967295L, 1099511627775L, 281474976710655L,
+ 72057594037927935L, -1L };
+ int byte_lens[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ assertEquals("Broken test!", boundries.length, byte_lens.length);
+
+ for (int i = 0; i < boundries.length; i++) {
+ buf.clear();
+ len = OrderedBytes.putVaruint64(buf, boundries[i], false);
+ assertEquals("Surprising serialized length.", byte_lens[i], len);
+ buf.flip();
+ assertEquals("Deserialization failed.", boundries[i], OrderedBytes.getVaruint64(buf, false));
+ buf.flip();
+ assertEquals("Length inspection failed.", i + 1, OrderedBytes.lengthVaru64(buf, false));
+
+ buf.clear();
+ len = OrderedBytes.putVaruint64(buf, boundries[i], true);
+ assertEquals("Surprising serialized length.", byte_lens[i], len);
+ buf.flip();
+ assertEquals("Deserialization failed.", boundries[i], OrderedBytes.getVaruint64(buf, true));
+ buf.flip();
+ assertEquals("Length inspection failed.", i + 1, OrderedBytes.lengthVaru64(buf, true));
+ }
+ }
+
+ /**
+ * Test integer encoding. Example input values come from reference wiki
+ * page.
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ */
+ @Test
+ public void testInt() {
+ long[] inputs =
+ { 0L, 1L, 10L, 99L, 100L, 1234L, 9999L, 10000L, 10001L, 12345L, 123450L, Long.MAX_VALUE };
+ ByteBuffer output = ByteBuffer.allocate(18);
+ for (long in : inputs) {
+ for (long i : new long[] { in, -in }) {
+ output.clear();
+ OrderedBytes.encodeInt(output, i, ASCENDING);
+ output.flip();
+ assertEquals(i, OrderedBytes.decodeInt(output));
+ }
+ }
+ }
+
+ /**
+ * Test real encoding. Example input values come from reference wiki page.
+ * @see http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki
+ */
+ @Test
+ public void testReal() {
+ double[] inputs =
+ { 0.0, 0.00123, 0.0123, 0.123, 1.0, 10.0, 12.345, 99.0, 99.01, 99.0001, 100.0, 100.01,
+ 100.1, 1234, 1234.5, 9999, 9999.000001, 9999.000009, 9999.00001, 9999.00009,
+ 9999.000099, 9999.0001, 9999.001, 9999.01, 9999.1, 10000.0, 10001.0, 12345.0, 123450.0,
+ Long.MAX_VALUE };
+ ByteBuffer output = ByteBuffer.allocate(27);
+ for (double in : inputs) {
+ for (double i : new double[] { in, -in }) {
+ output.clear();
+ OrderedBytes.encodeReal(output, i, ASCENDING);
+ output.flip();
+ assertEquals(i, OrderedBytes.decodeReal(output), 0.0001);
+ }
+ }
+ }
+
+ /**
+ * Test string encoding.
+ */
+ @Test
+ public void testString() {
+ String[] inputs = { "foo", "bar", "baz" };
+ ByteBuffer output = ByteBuffer.allocate(10);
+ for (String in : inputs) {
+ output.clear();
+ OrderedBytes.encodeString(output, in, ASCENDING);
+ output.flip();
+ assertEquals(in, OrderedBytes.decodeString(output));
+ }
+ }
+
+ /**
+ * Test Blob encoding.
+ */
+ @Test
+ public void testBlobLast() {
+ byte[][] inputs = { "foo".getBytes(), "bar".getBytes(), "".getBytes() };
+ ByteBuffer output = ByteBuffer.allocate(10);
+ for (byte[] in : inputs) {
+ output.clear();
+ OrderedBytes.encodeBlobLast(output, in, ASCENDING);
+ output.flip();
+ assertArrayEquals(in, OrderedBytes.decodeBlobLast(output));
+ }
+ }
+}
--
1.8.1