From 625e33fc90171f163693af2ae7b51ec1c8633f6e Mon Sep 17 00:00:00 2001
From: Nick Dimiduk
+ * Encoding specification is nicked from SQLite4's encoding scheme, hence the
+ * external links.
+ *
+ * Each value is encoded as one or more bytes. The first byte of the encoding,
+ * its meaning, and a terse description of the bytes that follow is given by
+ * the following table:
+ * Summary
+ *
+ *
+ *
+ * Content Type Encoding
+ * NULL 0x05
+ * NaN 0x06
+ * negative infinity 0x07
+ * negative large 0x08, ~E, ~M
+ * negative medium 0x13-E, ~M
+ * negative small 0x14, -E, ~M
+ * zero 0x15
+ * positive small 0x16, ~-E, M
+ * positive medium 0x17+E, M
+ * positive large 0x22, E, M
+ * positive infinity 0x23
+ * text 0x24, T
+ * binary 0x25, B
+ * final binary 0x26, X
+ * Each value that is a NULL encodes as a single byte of 0x05. Since every + * other value encoding begins with a byte greater than 0x05, this forces NULL + * values to sort first. + *
+ *+ * Each text value begins with a single byte of 0x24 and ends with a single + * byte of 0x00. There are zero or more intervening bytes that encode the text + * value. The intervening bytes are chosen so that the encoding will sort in + * the desired collating order. The intervening bytes may not contain a 0x00 + * character; the only 0x00 byte allowed in a text encoding is the final byte. + *
+ *+ * The text encoding ends in 0x00 in order to ensure that when there are two + * strings where one is a prefix of the other that the shorter string will + * sort first. + *
+ *+ * The encoding of binaries fields is different depending on whether or not + * the value to be encoded is the last value (the right-most value) in the + * key. + *
+ *+ * Each value that is BINARY that is not the last value of the key begins with + * a single byte of 0x25 and ends with a single byte of 0x00. There are zero + * or more intervening bytes that encode the binary value. None of the + * intervening bytes may be zero, as this conflicts with the termination + * marker. Thus, each of the intervening bytes contains 7 bits of blob content + * with a 1 in the high-order bit (the 0x80 bit). The final byte before the + * 0x00 contains any left-over bits of the blob content. + *
+ *+ * When the very last value of a key is BINARY, then it is encoded as a single + * byte of 0x26 and is followed by a byte-for-byte copy of the BINARY value. + * This alternative encoding is more efficient, but it only works if there are + * no subsequent values in the key, since there is no termination mark on the + * BLOB being encoded. + *
+ *+ * Numeric values must be coded so as to sort in numeric order. We assume that + * numeric values can be both integer and floating point values. + *
+ *+ * Simplest cases first: If the numeric value is a NaN, then the encoding is a + * single byte of 0x06. This causes NaN values to sort prior to every other + * numeric value. This deviates from the natural ordering of Java's Double + * values. A custom comparator that respects this ordering is provided in + * {@link OrderedBytes#REAL_CMP}. The only value that is less than a NaN is a + * NULL. + *
+ *+ * If the numeric value is a negative infinity then the encoding is a single + * byte of 0x07. Since every other numeric value except NaN has a larger + * initial byte, this encoding ensures that negative infinity will sort prior + * to every other numeric value other than NaN. + *
+ *+ * If the numeric value is a positive infinity then the encoding is a single + * byte of 0x23. Every other numeric value encoding begins with a smaller + * byte, ensuring that positive infinity always sorts last among numeric + * values. 0x0d is also smaller than 0x0e, the initial byte of a text value, + * ensuring that every numeric value sorts before every text value. + *
+ *+ * If the numeric value is exactly zero then it is encoded as a single byte of + * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 + * and finite positive values will have initial bytes of 0x16 through 0x22. + *
+ *+ * For all values, we compute a mantissa M and an exponent E. The mantissa is + * a base-100 representation of the value. The exponent E determines where to + * put the decimal point. + *
+ *+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X>=0 and X<=99) then the byte value will be + * 2*X+1 for every byte of the mantissa, except for the last byte which will + * be 2*X+0. The mantissa must be the minimum number of bytes necessary to + * represent the value; trailing X==0 digits are omitted. This means that the + * mantissa will never contain a byte with the value 0x00. + *
+ *+ * If we assume all digits of the mantissa occur to the right of the decimal + * point, then the exponent E is the power of one hundred by which one must + * multiply the mantissa to recover the original value. + *
+ *+ * Values are classified as large, medium, or small according to the value of + * E. If E is 11 or more, the value is large. For E between 0 and 10, the + * value is medium. For E less than zero, the value is small. + *
+ *+ * Large positive values are encoded as a single byte 0x22 followed by E as a + * varint and then M. Medium positive values are a single byte of 0x17+E + * followed by M. Small positive values are encoded as a single byte 0x16 + * followed by the ones-complement of the varint for -E followed by M. + *
+ *+ * Small negative values are encoded as a single byte 0x14 followed by -E as a + * varint and then the ones-complement of M. Medium negative values are + * encoded as a byte 0x13-E followed by the ones-complement of M. Large + * negative values consist of the single byte 0x08 followed by the + * ones-complement of the varint encoding of E followed by the ones-complement + * of M. + *
+ * @see Order. + */ + public int cmp(int cmp) { + return cmp * (this == ASCENDING ? 1 : -1); + } + + /** + * Apply order to the byteb.
+ */
+ public byte apply(byte b) {
+ return (byte) (this == ASCENDING ? b : b ^ mask);
+ }
+
+ /**
+ * Apply order to the byte array a.
+ */
+ public void apply(byte[] a) {
+ if (this != DESCENDING) return;
+ for (int i = 0; i < a.length; i++) {
+ a[i] ^= mask;
+ }
+ }
+
+ /**
+ * Apply order to the byte array a according to the Order.
+ */
+ public void apply(byte[] a, int offset, int length) {
+ if (this != DESCENDING) return;
+ for (int i = 0; i < length; i++) {
+ a[offset + i] ^= mask;
+ }
+ }
+
+ @Override
+ public String toString() { return this == ASCENDING ? "asc" : "dsc"; }
+ }
+
+ /**
+ * A Double Comparator that treats NaN as smallest.
+ */
+ public static final Comparatordst as 4 big-endian
+ * bytes.
+ * @return number of bytes written.
+ */
+ private static int putUint32(ByteBuffer dst, int val) {
+ dst.put((byte) (val >>> 24))
+ .put((byte) (val >>> 16))
+ .put((byte) (val >>> 8))
+ .put((byte) val);
+ return 4;
+ }
+
+ /**
+ * Encode an unsigned 64-bit integer val into dst.
+ * Compliment the encoded value when comp is true.
+ */
+ @VisibleForTesting
+ static int putVaruint64(ByteBuffer dst, long val, boolean comp) {
+ int w, y, start = dst.position();
+ byte[] a = dst.array();
+ Order ord = comp ? Order.DESCENDING : Order.ASCENDING;
+ if (-1 == unsignedCmp(val, 241L)) {
+ dst.put((byte) val);
+ ord.apply(a, start, 1);
+ return 1;
+ }
+ if (-1 == unsignedCmp(val, 2288L)) {
+ y = (int) (val - 240);
+ dst.put((byte) (y / 256 + 241))
+ .put((byte) (y % 256));
+ ord.apply(a, start, 2);
+ return 2;
+ }
+ if (-1 == unsignedCmp(val, 67824L)) {
+ y = (int) (val - 2288);
+ dst.put((byte) 249)
+ .put((byte) (y / 256))
+ .put((byte) (y % 256));
+ ord.apply(a, start, 3);
+ return 3;
+ }
+ y = (int) (val & 0xffffffff);
+ w = (int) (val >>> 32);
+ if (w == 0) {
+ if (-1 == unsignedCmp(y, 16777216L)) {
+ dst.put((byte) 250)
+ .put((byte) (y >>> 16))
+ .put((byte) (y >>> 8))
+ .put((byte) y);
+ ord.apply(a, start, 4);
+ return 4;
+ }
+ dst.put((byte) 251);
+ putUint32(dst, y);
+ ord.apply(a, start, 5);
+ return 5;
+ }
+ if (-1 == unsignedCmp(w, 256L)) {
+ dst.put((byte) 252)
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 6);
+ return 6;
+ }
+ if (-1 == unsignedCmp(w, 65536L)) {
+ dst.put((byte) 253)
+ .put((byte) (w >>> 8))
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 7);
+ return 7;
+ }
+ if (-1 == unsignedCmp(w, 16777216L)) {
+ dst.put((byte) 254)
+ .put((byte) (w >>> 16))
+ .put((byte) (w >>> 8))
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 8);
+ return 8;
+ }
+ dst.put((byte) 255);
+ putUint32(dst, w);
+ putUint32(dst, y);
+ ord.apply(a, start, 9);
+ return 9;
+ }
+
+ /**
+ * Inspect an encoded varu64 for it's encoded length. Does not modify
+ * src's state.
+ * @param src source buffer
+ * @param comp if true, parse the compliment of the value.
+ * @return number of bytes consumed by this value
+ */
+ @VisibleForTesting
+ static int lengthVaru64(ByteBuffer src, boolean comp) {
+ byte[] a = src.array();
+ int i = src.position();
+ int a0 = (comp ? a[i] ^ Order.mask : a[i]) & 0xff;
+ if (a0 <= 240) return 1;
+ if (a0 >= 241 && a0 <= 248) return 2;
+ if (a0 == 249) return 3;
+ if (a0 == 250) return 4;
+ if (a0 == 251) return 5;
+ if (a0 == 252) return 6;
+ if (a0 == 253) return 7;
+ if (a0 == 254) return 8;
+ if (a0 == 255) return 9;
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(a[i]));
+ }
+
+ /**
+ * Decode a sequence of bytes in buff as an unsigned 64-bit
+ * integer. Compliment the encoded value when comp is true.
+ */
+ @VisibleForTesting
+ static long getVaruint64(ByteBuffer buff, boolean comp) {
+ assert buff.remaining() >= lengthVaru64(buff, comp);
+ long ret;
+ byte x = buff.get();
+ int a0 = (comp ? x ^ Order.mask : x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8;
+ if (-1 == unsignedCmp(a0, 241)) {
+ return a0;
+ }
+ x = buff.get();
+ a1 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (-1 == unsignedCmp(a0, 249)) {
+ return (a0 - 241) * 256 + a1 + 240;
+ }
+ x = buff.get();
+ a2 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (a0 == 249) {
+ return 2288 + 256 * a1 + a2;
+ }
+ x = buff.get();
+ a3 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (a0 == 250) {
+ return (a1 << 16) | (a2 << 8) | a3;
+ }
+ x = buff.get();
+ a4 = (comp ? x ^ Order.mask : x) & 0xff;
+ ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4;
+ if (a0 == 251) {
+ return ret;
+ }
+ x = buff.get();
+ a5 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (a0 == 252) {
+ return (ret << 8) | a5;
+ }
+ x = buff.get();
+ a6 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (a0 == 253) {
+ return (ret << 16) | (a5 << 8) | a6;
+ }
+ x = buff.get();
+ a7 = (comp ? x ^ Order.mask : x) & 0xff;
+ if (a0 == 254) {
+ return (ret << 24) | (a5 << 16) | (a6 << 8) | a7;
+ }
+ x = buff.get();
+ a8 = (comp ? x ^ Order.mask : x) & 0xff;
+ return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8;
+ }
+
+ /**
+ * Skip buff over the encoded bytes.
+ */
+ static void skipVaruint64(ByteBuffer buff, boolean comp) {
+ buff.position(buff.position() + lengthVaru64(buff, comp));
+ }
+
+ /**
+ * Encode an integer value.
+ */
+ public static void encodeInt(ByteBuffer buff, long v, Order ord) {
+ int e, i, start = buff.position();
+ if (v == 0) {
+ buff.put((byte) 0x15); /* Numeric zero */
+ } else if (v < 0) {
+ i = buff.position();
+ buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeLargeNumeric(buff, BigDecimal.valueOf(-v), true, true);
+ if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */
+ } else {
+ i = buff.position();
+ buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */
+ e = encodeLargeNumeric(buff, BigDecimal.valueOf(v), false, false);
+ if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.array(), start, buff.position() - start);
+ }
+
+ /**
+ * Read significand digits from buff according to the magnitude
+ * of e. Uses a double for the accumulator. Treat
+ * encoded bytes as compliments when comp is true.
+ */
+ private static double decodeSignificand(ByteBuffer buff, int e, boolean comp) {
+ byte[] a = buff.array();
+ double m = 0;
+ double p = e - 1;
+ for (int i = buff.position();; i++) {
+ // base-100 digits are encoded as val * 2 + 1 except for the termination digit.
+ m += Math.pow(100.0, p) * (((comp ? a[i] ^ Order.mask : a[i]) & 0xff) / 2);
+ p--;
+ // detect termination digit
+ if (((comp ? a[i] ^ Order.mask : a[i]) & 1) == 0) {
+ buff.position(i + 1);
+ break;
+ }
+ }
+ return m;
+ }
+
+ /**
+ * Skip buff over the significand bytes.
+ */
+ private static void skipSignificand(ByteBuffer buff, boolean comp) {
+ byte[] a = buff.array();
+ for (int i = buff.position();; i++) {
+ if (((comp ? a[i] ^ Order.mask : a[i]) & 1) == 0) {
+ buff.position(i + 1);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Decode an integer value. The backing array is not modified through use of
+ * this method.
+ */
+ public static long decodeInt(ByteBuffer buff) {
+ byte x = buff.get();
+ int e = 0;
+ boolean dsc = (-1 == Integer.signum(x));
+ if (dsc) x = (byte) ((x ^ Order.mask) & 0xff);
+
+ if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */
+ e = 0x13 - x;
+ return (long) -decodeSignificand(buff, e, !dsc);
+ } else if (x == 0x15) { /* Numeric zero */
+ return 0;
+ } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */
+ e = x - 0x17;
+ return (long) decodeSignificand(buff, e, dsc);
+ } else {
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(x));
+ }
+ }
+
+ /**
+ * Encode the small positive floating point number r using the key encoding.
+ * The caller guarantees that r will be less than 1.0 and greater than 0.0.
+ * Write the compliment of e to buff when
+ * ecomp is true. Write the compliment of M to
+ * buff when mcomp is true.
+ */
+ private static void encodeSmallNumeric(ByteBuffer buff, BigDecimal dec, boolean ecomp,
+ boolean mcomp) {
+ // assert 0.0 < dec < 1.0
+ assert BigDecimal.ZERO.compareTo(dec) < 0 && BigDecimal.ONE.compareTo(dec) > 0;
+ int e = 0, d, startM;
+ Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING;
+ while (dec.compareTo(EN10) < 0) { dec = dec.movePointRight(8); e += 4; }
+ while (dec.compareTo(EN2) < 0) { dec = dec.movePointRight(2); e++; }
+ putVaruint64(buff, e, ecomp);
+ startM = buff.position();
+ for (int i = 0; i < 18 && dec.compareTo(BigDecimal.ZERO) != 0; i++) {
+ dec = dec.multiply(E2);
+ d = dec.intValue();
+ buff.put((byte) ((2 * d + 1) & 0xff));
+ dec = dec.subtract(BigDecimal.valueOf(d));
+ }
+ buff.array()[buff.position() - 1] &= 0xfe;
+ ord.apply(buff.array(), startM, buff.position() - startM);
+ }
+
+ /**
+ * Encode the large positive floating point number r using the key encoding.
+ * The caller guarantees that r will be finite and greater than or equal to
+ * 1.0. Write the compliment of e to buff when
+ * ecomp is true. Write the compliment of M to
+ * buff when mcomp is true.
+ * @return E(xponent) in base-100.
+ */
+ private static int encodeLargeNumeric(ByteBuffer buff, BigDecimal dec, boolean ecomp,
+ boolean mcomp) {
+ // assert dec >= 0.0
+ assert BigDecimal.ONE.compareTo(dec) <= 0;
+ int e = 0, d, startM;
+ Order ord = mcomp ? Order.DESCENDING : Order.ASCENDING;
+ while (dec.compareTo(E32) >= 0 && e <= 350) { dec = dec.movePointLeft(32); e +=16; }
+ while (dec.compareTo(E8) >= 0 && e <= 350) { dec = dec.movePointLeft(8); e+= 4; }
+ while (dec.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { dec = dec.movePointLeft(2); e++; }
+ if (e > 10) putVaruint64(buff, e, ecomp);
+ startM = buff.position();
+ for (int i = 0; i < 18 && dec.compareTo(BigDecimal.ZERO) != 0; i++) {
+ dec = dec.movePointRight(2);
+ d = dec.intValue();
+ buff.put((byte) ((2 * d + 1) & 0xff));
+ dec = dec.subtract(BigDecimal.valueOf(d));
+ }
+ buff.array()[buff.position() - 1] &= 0xfe;
+ ord.apply(buff.array(), startM, buff.position() - startM);
+ return e;
+ }
+
+ /**
+ * Encode a Real value.
+ */
+ public static void encodeReal(ByteBuffer buff, BigDecimal r, Order ord) {
+ int e, i, start = buff.position();
+ if (BigDecimal.ZERO.compareTo(r) == 0) {
+ buff.put((byte) 0x15); /* Numeric zero */
+ } else if (NEG_ONE.compareTo(r) >= 0) { // r <= -1.0
+ i = buff.position();
+ buff.put((byte) 0x08); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeLargeNumeric(buff, r.negate(), true, true);
+ if (e <= 10) buff.put(i, (byte) (0x13 - e)); /* Medium negative number: 0x13-E, ~M */
+ } else if (BigDecimal.ZERO.compareTo(r) > 0) { // r < 0.0
+ buff.put((byte) 0x14); /* Small negative number: 0x14, -E, ~M */
+ encodeSmallNumeric(buff, r.negate(), false, true);
+ } else if (BigDecimal.ONE.compareTo(r) > 0) { // r < 1.0
+ buff.put((byte) 0x16); /* Small positive number: 0x16, ~-E, M */
+ encodeSmallNumeric(buff, r, true, false);
+ } else {
+ i = buff.position();
+ buff.put((byte) 0x22); /* Large positive number: 0x22, E, M */
+ e = encodeLargeNumeric(buff, r, false, false);
+ if (e <= 10) buff.put(i, (byte) (0x17 + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.array(), start, buff.position() - start);
+ }
+
+ /**
+ * Encode a Real value.
+ */
+ public static void encodeReal(ByteBuffer buff, double r, Order ord) {
+ int start = buff.position();
+ if (r == 0.0) {
+ buff.put((byte) 0x15); /* Numeric zero */
+ ord.apply(buff.array(), start, buff.position() - start);
+ } else if (Double.isNaN(r)) {
+ buff.put((byte) 0x06); /* NaN */
+ ord.apply(buff.array(), start, buff.position() - start);
+ } else if (Double.NEGATIVE_INFINITY == r) {
+ buff.put((byte) 0x07);
+ ord.apply(buff.array(), start, buff.position() - start);
+ } else if (Double.POSITIVE_INFINITY == r) {
+ buff.put((byte) 0x23);
+ ord.apply(buff.array(), start, buff.position() - start);
+ } else {
+ encodeReal(buff, BigDecimal.valueOf(r), ord);
+ }
+ }
+
+ /**
+ * Decode a Real value. The backing array is not modified through use of
+ * this method.
+ */
+ public static double decodeReal(ByteBuffer buff) {
+ byte x = buff.get();
+ int e = 0;
+ boolean dsc = (-1 == Integer.signum(x));
+ if (dsc) x = (byte) ((x ^ Order.mask) & 0xff);
+
+ if (x == 0x06) { /* NaN */
+ return Double.NaN;
+ } else if (x == 0x07) { /* -inf */
+ return Double.NEGATIVE_INFINITY;
+ } else if (x == 0x08) { /* Large negative number: 0x08, ~E, ~M */
+ e = (int) getVaruint64(buff, !dsc);
+ return -decodeSignificand(buff, e, !dsc);
+ } else if (x >= 0x09 && x <= 0x13) { /* Medium negative number: 0x13-E, ~M */
+ e = 0x13 - x;
+ return -decodeSignificand(buff, e, !dsc);
+ } else if (x == 0x14) { /* Small negative number: 0x14, -E, ~M */
+ e = (int) -getVaruint64(buff, dsc);
+ return -decodeSignificand(buff, e, !dsc);
+ } else if (x == 0x15) { /* zero */
+ return Double.valueOf(0);
+ } else if (x == 0x16) { /* Small positive number: 0x16, ~-E, M */
+ e = (int) -getVaruint64(buff, !dsc);
+ return decodeSignificand(buff, e, dsc);
+ } else if (x >= 0x17 && x <= 0x21) { /* Medium positive number: 0x17+E, M */
+ e = x - 0x17;
+ return decodeSignificand(buff, e, dsc);
+ } else if (x == 0x22) { /* Large positive number: 0x22, E, M */
+ e = (int) getVaruint64(buff, dsc);
+ return decodeSignificand(buff, e, dsc);
+ } else if (x == 0x23) { /* +inf */
+ return Double.POSITIVE_INFINITY;
+ } else {
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(x));
+ }
+ }
+
+ /**
+ * Encode a String value.
+ */
+ public static void encodeString(ByteBuffer buff, String s, Order ord) {
+ if (s.contains("\u0000"))
+ throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'");
+ int start = buff.position();
+ buff.put((byte) 0x24);
+ buff.put(s.getBytes(UTF8));
+ buff.put((byte) 0x00);
+ ord.apply(buff.array(), start, buff.position() - start);
+ }
+
+ /**
+ * Decode a String value. The backing array is not modified through use of
+ * this method.
+ */
+ public static String decodeString(ByteBuffer buff) {
+ byte header = buff.get();
+ assert header == 0x24 || header == (byte) 0xdb;
+ Order ord = header == 0x24 ? Order.ASCENDING : Order.DESCENDING;
+ byte[] a = buff.array();
+ int start = buff.position(), i = start;
+ byte terminator = (byte) (ord == Order.ASCENDING ? 0x00 : 0xff);
+ while (a[i] != terminator) i++;
+ buff.position(++i);
+ if (Order.DESCENDING == ord) {
+ byte[] copy = Arrays.copyOfRange(a, start, i - 1);
+ ord.apply(copy);
+ return new String(copy, UTF8);
+ } else {
+ return new String(a, start, i - start - 1, UTF8);
+ }
+ }
+
+ /**
+ * Calculate the expected blob-mid encoded length based on unencoded length.
+ */
+ @VisibleForTesting
+ static int blobMidEncodedLength(int len) {
+ return
+ ((len * 8) // 8-bits per input byte
+ + 6) // + up to 6 bits of encoded overflow
+ / 7 // 7-bits of input data per encoded byte
+ + 2; // + 1-byte header, + 1-byte footer
+ }
+
+ /**
+ * Calculate the expected blob-mid decoded length based on encoded length.
+ */
+ @VisibleForTesting
+ static int blobMidDecodedLength(int len) {
+ return ((len * 7) - 6) / 8;
+ }
+
+ /**
+ * Encode a Blob value, intermediate element in Key.
+ */
+ public static void encodeBlobMid(ByteBuffer buff, byte[] b, Order ord) {
+ // Blobs as intermediate entries are encoded as 7-bits per byte, null-terminated.
+ assert buff.remaining() >= blobMidEncodedLength(b.length) : "buffer overflow expected.";
+ int start = buff.position();
+ buff.put((byte) 0x25); /* Blob-mid */
+ byte s = 1, t = 0;
+ for (int i = 0; i < b.length; i++) {
+ buff.put((byte) (0x80 | t | ((b[i] & 0xff) >>> s)));
+ if (s < 7) {
+ t = (byte) (b[i] << (7 - s));
+ s++;
+ } else {
+ buff.put((byte) (0x80 | b[i]));
+ s = 1;
+ t = 0;
+ }
+ }
+ if (s > 1) buff.put((byte) (0x80 | t));
+ buff.put((byte) 0x00);
+ ord.apply(buff.array(), start, buff.position() - start);
+ }
+
+ /**
+ * Decode a blob value that was encoded using BlobMid encoding. The backing
+ * array is not modified through use of this method.
+ */
+ public static byte[] decodeBlobMid(ByteBuffer buff) {
+ byte header = buff.get();
+ assert header == 0x25 || header == (byte) 0xda;
+ boolean isDsc = header != 0x25;
+ byte[] a = buff.array();
+ int start = buff.position(), i = start;
+ byte terminator = (byte) (isDsc ? 0xff : 0x00);
+ while (a[i] != terminator) i++;
+ if (i - start == 0) {
+ // skip empty input buffer.
+ buff.get();
+ return new byte[0];
+ }
+ ByteBuffer ret = ByteBuffer.allocate(blobMidDecodedLength(i - start + 1));
+ int s = 6;
+ byte t = (byte) (((isDsc ? a[start] ^ Order.mask : a[start]) << 1) & 0xff);
+ for (i = start + 1; a[i] != terminator; i++) {
+ if (s == 7) {
+ ret.put((byte) (t | ((isDsc ? a[i] ^ Order.mask : a[i]) & 0x7f)));
+ i++;
+ } else {
+ ret.put((byte) (t | (((isDsc ? a[i] ^ Order.mask : a[i]) & 0x7f) >>> s)));
+ }
+ t = (byte) (((isDsc ? a[i] ^ Order.mask : a[i]) << 8 - s) & 0xff);
+ s = s == 1 ? 7 : s - 1;
+ }
+ buff.position(++i);
+ assert t == 0 : "Unexpected bits remaining after decoding blob.";
+ return ret.array();
+ }
+
+ /**
+ * Encode a Blob value, last element in Key.
+ */
+ public static void encodeBlobLast(ByteBuffer buff, byte[] b, int offset, int len, Order ord) {
+ // Blobs as final entry in a compound key are written unencoded.
+ assert buff.remaining() >= len + 2;
+ for (int i = offset; i < offset + len; i++) {
+ if (b[i] == 0x00)
+ throw new IllegalArgumentException("0x00 bytes not permitted in value.");
+ }
+ int start = buff.position();
+ buff.put((byte) 0x26);
+ buff.put(b, offset, len);
+ buff.put((byte) 0x00);
+ ord.apply(buff.array(), start, buff.position() - start);
+ }
+
+ /**
+ * Encode a Blob value, last element in Key.
+ */
+ public static void encodeBlobLast(ByteBuffer buff, byte[] b, Order ord) {
+ encodeBlobLast(buff, b, 0, b.length, ord);
+ }
+
+ /**
+ * Decode a Blob value, last element in Key. The backing array is not
+ * modified through use of this method.
+ */
+ public static byte[] decodeBlobLast(ByteBuffer buff) {
+ byte header = buff.get();
+ assert header == 0x26 || header == (byte) 0xd9;
+ Order ord = header == 0x26 ? Order.ASCENDING : Order.DESCENDING;
+ int length = buff.limit() - buff.position() - 1;
+ byte[] ret = new byte[length];
+ buff.get(ret);
+ buff.get(); // throw away the termination marker.
+ ord.apply(ret, 0, ret.length);
+ return ret;
+ }
+
+ /**
+ * Encode a null value.
+ */
+ public static void encodeNull(ByteBuffer buff, Order ord) {
+ buff.put(ord.apply((byte) 0x05));
+ }
+
+ /**
+ * Encode a single value into a buff.
+ */
+ public static void encode(ByteBuffer buff, Object val) {
+ encode(buff, val, Order.ASCENDING, true);
+ }
+
+ /**
+ * Encode a single value into a buff.
+ */
+ public static void encode(ByteBuffer buff, Object val, Order ord) {
+ encode(buff, val, ord, true);
+ }
+
+ /**
+ * Encode a single value into buff.
+ * TODO: refactor this so that users can register new type encoders.
+ * @param buff the destination.
+ * @param val the object to encode.
+ * @param ord the Order to apply.
+ * @param isLast indicate that this value is final in a sequence of values.
+ */
+ private static void encode(ByteBuffer buff, Object val, Order ord, boolean isLast) {
+ if (null == val) {
+ encodeNull(buff, ord);
+ return;
+ }
+ Class> c = val.getClass();
+ if (Boolean.class.isAssignableFrom(c) || Character.class.isAssignableFrom(c)
+ || Byte.class.isAssignableFrom(c) || Short.class.isAssignableFrom(c)
+ || Integer.class.isAssignableFrom(c) || Long.class.isAssignableFrom(c)) {
+ encodeInt(buff, (Long) val, ord);
+ return;
+ }
+ if (Float.class.isAssignableFrom(c) || Double.class.isAssignableFrom(c)) {
+ encodeReal(buff, (Double) val, ord);
+ return;
+ }
+ if (String.class.isAssignableFrom(c)) {
+ encodeString(buff, (String) val, ord);
+ return;
+ }
+ if (byte[].class.isAssignableFrom(c)) {
+ if (isLast) encodeBlobLast(buff, (byte[]) val, ord);
+ else encodeBlobMid(buff, (byte[]) val, ord);
+ return;
+ }
+
+ throw new IllegalArgumentException(
+ "No registered handler for Object of type " + val.getClass().getSimpleName());
+ }
+
+ /**
+ * Encode a sequence of values into a compound key.
+ */
+ public static void encode(ByteBuffer buff, Object[] vals) {
+ Order[] orders = new Order[vals.length];
+ Arrays.fill(orders, Order.ASCENDING);
+ encode(buff, vals, orders);
+ }
+
+ /**
+ * Encode a sequence of values into a compound key.
+ */
+ public static void encode(ByteBuffer buff, Object[] vals, Order ord) {
+ Order[] orders = new Order[vals.length];
+ Arrays.fill(orders, ord);
+ encode(buff, vals, orders);
+ }
+
+ /**
+ * Encode a sequence of values into a compound key.
+ */
+ public static void encode(ByteBuffer buff, Object[] vals, Order[] orders) {
+ if (vals.length != orders.length)
+ throw new IllegalArgumentException("vals and orders array lengths do not match.");
+
+ for (int i = 0; i < vals.length; i++) {
+ encode(buff, vals[i], orders[i], i == vals.length - 1);
+ }
+ }
+
+ /**
+ * Decode compound key entries. The backing array is not modified through
+ * use of this method.
+ */
+ public static Object[] decode(ByteBuffer buff) {
+ ArrayList