From 3fa7ab9a170b40b6f5d6144cff9093a01c868946 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 1 Aug 2013 13:16:24 -0700 Subject: [PATCH] HBASE-8201 OrderedBytes: order-preserving encoding OrderedBytes provides a data encoding format in which the resulting byte[] retains the same sort order as the natural types. Encoded formats can be inspected and decoded without forward knowledge of their content. Implementations are provided for integer and floating point numbers with 32- and 64-bits of precision, numeric values of arbitrary precision, Strings, and byte[]s. Utility methods for counting and skipping encoded entries are also provided. The encoding format is modeled after the SQLite4 key encoding format. Implementations of variable-length encodings are very similar. fixed-width encodings are modeled after the fixed-width formats provided by the Orderly library. Javadocs on the OrderedBytes class describe the encoding format in detail. See http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki for additional context. Notable deviation from the sqlite4 spec include: - Different header byte values. This is to give users more room to place their own, custom encoding extensions as they see fit. - Blob-last is modified to include a termination byte of 0x00. This is necessary in order to maintain reverse sort order of empty values. It is also renamed to "BlobCopy". - Support for fixed-length integer and float encodings. --- .../java/org/apache/hadoop/hbase/util/Numeric.java | 204 ++++ .../java/org/apache/hadoop/hbase/util/Order.java | 76 ++ .../org/apache/hadoop/hbase/util/OrderedBytes.java | 1286 ++++++++++++++++++++ .../apache/hadoop/hbase/util/TestOrderedBytes.java | 755 ++++++++++++ 4 files changed, 2321 insertions(+) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/Numeric.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Numeric.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Numeric.java new file mode 100644 index 0000000..4d31fe2 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Numeric.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * HNumeric represents a numeric value for use with {@link OrderedBytes}. This + * is necessary because {@link BigDecimal} does not support a representation + * for NaN or +/-Inf. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Numeric extends Number { + private static final long serialVersionUID = -4488167747731287844L; + + /** + * The context used for numerical operations. + */ + public static final MathContext MATH_CONTEXT = new MathContext(31, RoundingMode.HALF_UP); + + public static final Numeric NaN = new Numeric(Double.NaN); + public static final Numeric NEGATIVE_INFINITY = new Numeric(Double.NEGATIVE_INFINITY); + public static final Numeric ZERO = new Numeric(0.0); + public static final Numeric POSITIVE_INFINITY = new Numeric(Double.POSITIVE_INFINITY); + + private final boolean isR; + private final boolean isZ; + private final long z; + private final double r; + private final BigDecimal bd; + + private transient int hashCode = 0; + + /** + * Create an Numeric instance over a double. + */ + public Numeric(double val) { + isR = true; + r = val; + isZ = false; + z = 0; + bd = null; + } + + /** + * Create an Numeric instance over a long. + */ + public Numeric(long val) { + isZ = true; + z = val; + isR = false; + r = 0.0; + bd = null; + } + + /** + * Create an Numeric instance over a BigDecimal . + */ + public Numeric(BigDecimal val) { + if (null == val) throw new NullPointerException(); + + // see if this can be a long instead + boolean isLong = false; + long lng = 0; + try { + lng = val.longValueExact(); + isLong = true; + } catch (ArithmeticException e) { + } + + if (isLong) { + isZ = true; + z = lng; + isR = false; + r = 0.0; + bd = null; + } else { + // doesn't fit in a long, fall back to BD + bd = val.round(MATH_CONTEXT); + isZ = false; + isR = false; + z = 0; + r = 0.0; + } + } + + /** + * Returns true if the Number is an Integer and + * fits in a long, false otherwise. + */ + public boolean isInteger() { + return isZ; + } + + /** + * Returns true if the Number is a Real and fits + * in a double, false otherwise. + */ + public boolean isReal() { + return isR; + } + + /** + * Returns true if the Number is infinitely large + * in magnitude, false otherwise. + */ + public boolean isInfinite() { + return isR && Double.isInfinite(r); + } + + /** + * Returns true if the Number is a Not-a-Number + * (NaN) value, false otherwise. + */ + public boolean isNaN() { + return isR && Double.isNaN(r); + } + + /** + * Retrieve the value as a BigDecimal. This will silently + * promote a double or long to a + * BigDecimal when possible, so use it only if a primitive + * value is not available. Check availability using {@link #isInteger()} and + * {@link #isReal()}. + * @throws NumberFormatException if the + * Number is infinite or NaN. + */ + public BigDecimal exactValue() { + return null == bd ? isR ? BigDecimal.valueOf(r) : BigDecimal.valueOf(z) : bd; + } + + @Override + public double doubleValue() { + return isReal() ? r : isInteger() ? (double) z : bd.doubleValue(); + } + + @Override + public int intValue() { + return isInteger() ? (int) z : isReal() ? (int) r : bd.intValue(); + } + + @Override + public long longValue() { + return isInteger() ? z : isReal() ? (long) r : bd.longValue(); + } + + @Override + public float floatValue() { + return isReal() ? (float) r : isInteger() ? (float) z : bd.floatValue(); + } + + @Override + public String toString() { + return isReal() ? Double.toString(r) : isInteger() ? Long.toString(z) : bd.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (null == o) return false; + if (!(o instanceof Numeric)) return false; + Numeric that = (Numeric) o; + if (this.isInteger() && that.isInteger()) return this.longValue() == that.longValue(); + if (this.isReal() && that.isReal()) return this.doubleValue() == that.doubleValue(); + return 0 == this.exactValue().compareTo(that.exactValue()); + } + + @Override + public int hashCode() { + if (0 != hashCode) return hashCode; + int result = 1; + if (isInteger()) { + result = result * 23 + (int) (z ^ (z >>> 32)); + } else if (isReal()) { + long bits = Double.doubleToLongBits(r); + result = result * 13 + (int) (bits ^ (bits >>> 32)); + } else { + result = result * 17 + bd.hashCode(); + } + hashCode = result; + return result; + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java new file mode 100644 index 0000000..1123528 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Order.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Used to describe or modify the lexicographical sort order of a byte[]. + * Default ordering is considered ASCENDING. The order of a byte[] can be + * inverted, resulting in DESCENDING order, by replacing each byte with its + * 1's compliment. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum Order { + ASCENDING, DESCENDING; + + private static final byte mask = (byte) 0xff; + + /** + * Returns the adjusted trichotomous value according to the ordering imposed + * by this Order. + */ + public int cmp(int cmp) { + return cmp * (this == ASCENDING ? 1 : -1); + } + + /** + * Apply order to the byte val. + */ + public byte apply(byte val) { + return (byte) (this == ASCENDING ? val : val ^ mask); + } + + /** + * Apply order to the byte array val. + */ + public void apply(byte[] val) { + if (this != DESCENDING) return; + for (int i = 0; i < val.length; i++) { + val[i] ^= mask; + } + } + + /** + * Apply order to the byte array val according to the Order. + */ + public void apply(byte[] val, int offset, int length) { + if (this != DESCENDING) return; + for (int i = 0; i < length; i++) { + val[offset + i] ^= mask; + } + } + + @Override + public String toString() { + return this == ASCENDING ? "ASCENDING" : "DESCENDING"; + } +} + diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java new file mode 100644 index 0000000..9b82562 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java @@ -0,0 +1,1286 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.apache.hadoop.hbase.util.Order.ASCENDING; +import static org.apache.hadoop.hbase.util.Order.DESCENDING; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Utility class that handles ordered byte arrays. That is, unlike + * {@link Bytes}, these methods produce byte arrays which maintain the sort + * order of the original values. + *

Encoding Format summary

+ *

+ * Each value is encoded as one or more bytes. The first byte of the encoding, + * its meaning, and a terse description of the bytes that follow is given by + * the following table: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
Content TypeEncoding
NULL0x05
negative infinity0x07
negative large0x08, ~E, ~M
negative medium0x13-E, ~M
negative small0x14, -E, ~M
zero0x15
positive small0x16, ~-E, M
positive medium0x17+E, M
positive large0x22, E, M
positive infinity0x23
NaN0x25
fixed-length 32-bit integer0x27, I
fixed-length 64-bit integer0x28, I
fixed-length 32-bit float0x30, F
fixed-length 64-bit float0x31, F
text0x33, T
variable binary0x35, B
copy binary0x36, X
+ *

+ * + *

Null Encoding

+ *

+ * Each value that is a NULL encodes as a single byte of 0x05. Since every + * other value encoding begins with a byte greater than 0x05, this forces NULL + * values to sort first. + *

+ *

Text Encoding

+ *

+ * Each text value begins with a single byte of 0x33 and ends with a single + * byte of 0x00. There are zero or more intervening bytes that encode the text + * value. The intervening bytes are chosen so that the encoding will sort in + * the desired collating order. The intervening bytes may not contain a 0x00 + * character; the only 0x00 byte allowed in a text encoding is the final byte. + *

+ *

+ * The text encoding ends in 0x00 in order to ensure that when there are two + * strings where one is a prefix of the other that the shorter string will + * sort first. + *

+ *

Binary Encoding

+ *

+ * There are two encoding strategies for binary fields, referred to as + * "BlobVar" and "BlobCopy". BlobVar is less efficient in both space and + * encoding time. It has no limitations on the range of encoded values. + * BlobCopy is a byte-for-byte copy of the input data followed by a + * termination byte. It is extremely fast to encode and decode. It carries the + * restriction of not allowing a 0x00 value in the input byte[] as this value + * is used as the termination byte. + *

+ *

BlobVar

+ *

+ * "BlobVar" encodes the input byte[] in a manner similar to a variable length + * integer encoding. As with the other OrderedBytes encodings, + * the first encoded byte is used to indicate what kind of value follows. This + * header byte is 0x35 for BlobVar encoded values. As with the traditional + * varint encoding, the most significant bit of each subsequent encoded + * byte is used as a continuation marker. The 7 remaining bits + * contain the 7 most significant bits of the first unencoded byte. The next + * encoded byte starts with a continuation marker in the MSB. The least + * significant bit from the first unencoded byte follows, and the remaining 6 + * bits contain the 6 MSBs of the second unencoded byte. The encoding + * continues, encoding 7 bytes on to 8 encoded bytes. The MSB of the final + * encoded byte contains a termination marker rather than a continuation + * marker, and any remaining bits from the final input byte. Any trailing bits + * in the final encoded byte are zeros. + *

+ *

BlobCopy

+ *

+ * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x36 + * as the header byte, and is terminated by 0x00. This alternative encoding is + * more efficient, but it cannot accept values containing a 0x00 byte. + *

+ *

Variable-length Numeric Encoding

+ *

+ * Numeric values must be coded so as to sort in numeric order. We assume that + * numeric values can be both integer and floating point values. The wrapper + * class {@link Numeric} is used to smooth over values decoded using this + * scheme. + *

+ *

+ * Simplest cases first: If the numeric value is a NaN, then the encoding is a + * single byte of 0x25. This causes NaN values to sort after to every other + * numeric value. + *

+ *

+ * If the numeric value is a negative infinity then the encoding is a single + * byte of 0x07. Since every other numeric value except NaN has a larger + * initial byte, this encoding ensures that negative infinity will sort prior + * to every other numeric value other than NaN. + *

+ *

+ * If the numeric value is a positive infinity then the encoding is a single + * byte of 0x23. Every other numeric value encoding begins with a smaller + * byte, ensuring that positive infinity always sorts last among numeric + * values. 0x23 is also smaller than 0x33, the initial byte of a text value, + * ensuring that every numeric value sorts before every text value. + *

+ *

+ * If the numeric value is exactly zero then it is encoded as a single byte of + * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 + * and finite positive values will have initial bytes of 0x16 through 0x22. + *

+ *

+ * For all numeric values, we compute a mantissa M and an exponent E. The + * mantissa is a base-100 representation of the value. The exponent E + * determines where to put the decimal point. + *

+ *

+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X≥0 and X≤99) then the byte value will + * be 2*X+1 for every byte of the mantissa, except for the last byte which + * will be 2*X+0. The mantissa must be the minimum number of bytes necessary + * to represent the value; trailing X==0 digits are omitted. This means that + * the mantissa will never contain a byte with the value 0x00. + *

+ *

+ * If we assume all digits of the mantissa occur to the right of the decimal + * point, then the exponent E is the power of one hundred by which one must + * multiply the mantissa to recover the original value. + *

+ *

+ * Values are classified as large, medium, or small according to the value of + * E. If E is 11 or more, the value is large. For E between 0 and 10, the + * value is medium. For E less than zero, the value is small. + *

+ *

+ * Large positive values are encoded as a single byte 0x22 followed by E as a + * varint and then M. Medium positive values are a single byte of 0x17+E + * followed by M. Small positive values are encoded as a single byte 0x16 + * followed by the ones-complement of the varint for -E followed by M. + *

+ *

+ * Small negative values are encoded as a single byte 0x14 followed by -E as a + * varint and then the ones-complement of M. Medium negative values are + * encoded as a byte 0x13-E followed by the ones-complement of M. Large + * negative values consist of the single byte 0x08 followed by the + * ones-complement of the varint encoding of E followed by the ones-complement + * of M. + *

+ *

Fixed-length Integer Encoding

+ *

+ * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte + * format. All 8-byte integers are serialized to the equivelant 9-byte format. + * Serialization is performed by writing a header byte, inverting the integer + * sign bit and writing the resulting bytes to the byte array in big endian + * order. + *

+ *

Fixed-length Floating Point Encoding

+ *

+ * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte + * encoding format, respectively. The format is identical, save for the + * precision respected in each step of the operation. + *

+ * This format ensures the following total ordering of floating point values: + * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < + * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... < + * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN + *

+ *

+ * Floating point numbers are encoded as specified in IEEE 754. A 32-bit + * single precision float consists of a sign bit, 8-bit unsigned exponent + * encoded in offset-127 notation, and a 23-bit significand. The format is + * described further in the Single Precision + * Floating Point Wikipedia page + *

+ *

+ * The value of a normal float is -1 sign bit × + * 2exponent - 127 × 1.significand + *

+ *

+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *

+ *

+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is less + * than negative 1), and that all negative numbers compare less than any + * positive number. To accomplish this, we invert the sign bit of all floating + * point numbers, and we also invert the exponent and significand bits if the + * floating point number was negative. + *

+ *

+ * More specifically, we first store the floating point bits into a 32-bit int + * j using {@link Float#floatToIntBits}. This method collapses + * all NaNs into a single, canonical NaN value but otherwise leaves the bits + * unchanged. We then compute + *

+ * + *
+ * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE
+ * 
+ *

+ * which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of j in most significant byte + * order is equivalent to performing a single precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't + * compare equal to anything when performing floating point comparisons). + *

+ *

+ * The resulting integer is then converted into a byte array by serializing + * the integer one byte at a time in most significant byte order. The + * serialized integer is prefixed by a single header byte. All serialized + * values are 5 bytes in length. + *

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OrderedBytes { + + /* + * The following constant values are used by encoding implementations + */ + + public static final byte TERM = 0x00; + public static final byte NULL = 0x05; + public static final byte NEG_INF = 0x07; + public static final byte NEG_LARGE = 0x08; + public static final byte NEG_MED_MIN = 0x09; + public static final byte NEG_MED_MAX = 0x13; + public static final byte NEG_SMALL = 0x14; + public static final byte ZERO = 0x15; + public static final byte POS_SMALL = 0x16; + public static final byte POS_MED_MIN = 0x17; + public static final byte POS_MED_MAX = 0x21; + public static final byte POS_LARGE = 0x22; + public static final byte POS_INF = 0x23; + public static final byte NAN = 0x25; + public static final byte FIXED_INT32 = 0x27; + public static final byte FIXED_INT64 = 0x28; + public static final byte FIXED_FLOAT32 = 0x30; + public static final byte FIXED_FLOAT64 = 0x31; + public static final byte TEXT = 0x33; + public static final byte BLOB_VAR = 0x35; + public static final byte BLOB_COPY = 0x36; + + public static final Charset UTF8 = Charset.forName("UTF-8"); + + // constants used for numeric {en,de}coding + private static final BigDecimal E8 = BigDecimal.valueOf(1e8); + private static final BigDecimal E32 = BigDecimal.valueOf(1e32); + private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2); + private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10); + + /** + * Perform unsigned comparison between two long values. Conforms to the same + * interface as {@link Comparator#compare(Object, Object)}. + */ + private static int unsignedCmp(long x1, long x2) { + int cmp; + if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; + // invert the result when either value is negative + if ((x1 < 0) != (x2 < 0)) return -cmp; + return cmp; + } + + /** + * Write a 32-bit unsigned integer to dst as 4 big-endian + * bytes. + * @return incremented offset. + */ + private static int putUint32(byte[] dst, int offset, int val) { + dst[offset++] = (byte) (val >>> 24); + dst[offset++] = (byte) (val >>> 16); + dst[offset++] = (byte) (val >>> 8); + dst[offset++] = (byte) val; + return offset; + } + + /** + * Encode an unsigned 64-bit integer val into dst. Compliment the + * encoded value when comp is true. + * @return incremented offset; + */ + @VisibleForTesting + static int putVaruint64(byte[] dst, int offset, long val, boolean comp) { + int w, y, start = offset; + Order ord = comp ? DESCENDING : ASCENDING; + if (-1 == unsignedCmp(val, 241L)) { + dst[offset++] = (byte) val; + ord.apply(dst, start, offset - start); + return offset; + } + if (-1 == unsignedCmp(val, 2288L)) { + y = (int) (val - 240); + dst[offset++] = (byte) (y / 256 + 241); + dst[offset++] = (byte) (y % 256); + ord.apply(dst, start, offset - start); + return offset; + } + if (-1 == unsignedCmp(val, 67824L)) { + y = (int) (val - 2288); + dst[offset++] = (byte) 249; + dst[offset++] = (byte) (y / 256); + dst[offset++] = (byte) (y % 256); + ord.apply(dst, start, offset - start); + return offset; + } + y = (int) (val & 0xffffffff); + w = (int) (val >>> 32); + if (w == 0) { + if (-1 == unsignedCmp(y, 16777216L)) { + dst[offset++] = (byte) 250; + dst[offset++] = (byte) (y >>> 16); + dst[offset++] = (byte) (y >>> 8); + dst[offset++] = (byte) y; + ord.apply(dst, start, offset - start); + return offset; + } + dst[offset++] = (byte) 251; + offset = putUint32(dst, offset, y); + ord.apply(dst, start, offset - start); + return offset; + } + if (-1 == unsignedCmp(w, 256L)) { + dst[offset++] = (byte) 252; + dst[offset++] = (byte) w; + offset = putUint32(dst, offset, y); + ord.apply(dst, start, offset - start); + return offset; + } + if (-1 == unsignedCmp(w, 65536L)) { + dst[offset++] = (byte) 253; + dst[offset++] = (byte) (w >>> 8); + dst[offset++] = (byte) w; + offset = putUint32(dst, offset, y); + ord.apply(dst, start, offset - start); + return offset; + } + if (-1 == unsignedCmp(w, 16777216L)) { + dst[offset++] = (byte) 254; + dst[offset++] = (byte) (w >>> 16); + dst[offset++] = (byte) (w >>> 8); + dst[offset++] = (byte) w; + offset = putUint32(dst, offset, y); + ord.apply(dst, start, offset - start); + return offset; + } + dst[offset++] = (byte) 255; + offset = putUint32(dst, offset, w); + offset = putUint32(dst, offset, y); + ord.apply(dst, start, offset - start); + return offset; + } + + /** + * Skip offset forward over an encoded varu64 value. + * @param src source buffer + * @param offset position in src at which to start. + * @param comp if true, parse the compliment of the value. + * @return updated value of offset + */ + @VisibleForTesting + static int skipVaruint64(byte[] src, int offset, boolean comp) { + int a0 = (comp ? DESCENDING : ASCENDING).apply(src[offset]) & 0xff; + if (a0 <= 240) return offset + 1; + if (a0 >= 241 && a0 <= 248) return offset + 2; + if (a0 == 249) return offset + 3; + if (a0 == 250) return offset + 4; + if (a0 == 251) return offset + 5; + if (a0 == 252) return offset + 6; + if (a0 == 253) return offset + 7; + if (a0 == 254) return offset + 8; + if (a0 == 255) return offset + 9; + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(src[offset])); + } + + /** + * Decode a sequence of bytes in buff as an unsigned 64-bit + * integer. Compliment the encoded value when comp is true. + * @return the decoded value. + */ + @VisibleForTesting + static long getVaruint64(byte[] buff, int offset, boolean comp) { + assert skipVaruint64(buff, offset, comp) <= buff.length; + long ret; + Order ord = comp ? DESCENDING : ASCENDING; + byte x = buff[offset++]; + int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8; + if (-1 == unsignedCmp(a0, 241)) { + return a0; + } + x = buff[offset++]; + a1 = ord.apply(x) & 0xff; + if (-1 == unsignedCmp(a0, 249)) { + return (a0 - 241) * 256 + a1 + 240; + } + x = buff[offset++]; + a2 = ord.apply(x) & 0xff; + if (a0 == 249) { + return 2288 + 256 * a1 + a2; + } + x = buff[offset++]; + a3 = ord.apply(x) & 0xff; + if (a0 == 250) { + return (a1 << 16) | (a2 << 8) | a3; + } + x = buff[offset++]; + a4 = ord.apply(x) & 0xff; + ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4; + if (a0 == 251) { + return ret; + } + x = buff[offset++]; + a5 = ord.apply(x) & 0xff; + if (a0 == 252) { + return (ret << 8) | a5; + } + x = buff[offset++]; + a6 = ord.apply(x) & 0xff; + if (a0 == 253) { + return (ret << 16) | (a5 << 8) | a6; + } + x = buff[offset++]; + a7 = ord.apply(x) & 0xff; + if (a0 == 254) { + return (ret << 24) | (a5 << 16) | (a6 << 8) | a7; + } + x = buff[offset++]; + a8 = ord.apply(x) & 0xff; + return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8; + } + + /** + * Read significand digits from buff according to the magnitude + * of e. + * @param buff The source from which to read encoded digits. + * @param e The magnitude of the first digit read. + * @param comp Treat encoded bytes as compliments when comp is true. + * @return The decoded value. + */ + private static BigDecimal decodeSignificand(byte[] buff, int offset, int e, boolean comp) { + // TODO: can this be made faster? + BigDecimal m = BigDecimal.ZERO; + e--; + for (;; offset++) { + // base-100 digits are encoded as val * 2 + 1 except for the termination digit. + m = m.add( // m += + new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit] + BigDecimal.valueOf(((comp ? DESCENDING : ASCENDING).apply(buff[offset]) & 0xff) / 2))); + e--; + // detect termination digit + if (((comp ? DESCENDING : ASCENDING).apply(buff[offset]) & 1) == 0) { + break; + } + } + return m; + } + + /** + * Skip buff over the significand bytes. + * @return updated offset. + */ + private static int skipSignificand(byte[] buff, int offset, boolean comp) { + while (((comp ? DESCENDING : ASCENDING).apply(buff[offset++]) & 1) != 0) + ; + return offset; + } + + /** + * Encode the small magnitude floating point number val using + * the key encoding. The caller guarantees that 1.0 > abs(val) > 0.0. + *

+ * A floating point value is encoded as an integer exponent E + * and a mantissa M. The original value is equal to + * (M * 100^E). E is set to the smallest value + * possible without making M greater than or equal to 1.0. + *

+ *

+ * For this routine, E will always be zero or negative, since + * the original value is less than one. The encoding written by this routine + * is the ones-complement of the varint of the negative of E + * followed by the mantissa: + * + *

+   *   Encoding:   ~-E  M
+   * 
+ *

+ * @param buff The destination to which encoded digits are written. + * @param val The value to encode. + * @return incremented offset. + */ + private static int encodeNumericSmall(byte[] buff, int offset, BigDecimal val) { + // TODO: this can be done faster? + // assert 1.0 > abs(val) > 0.0 + BigDecimal abs = val.abs(); + assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0; + boolean isNeg = val.signum() == -1; + int e = 0, d, startM; + + if (isNeg) { /* Small negative number: 0x14, -E, ~M */ + buff[offset++] = NEG_SMALL; + } else { /* Small positive number: 0x16, ~-E, M */ + buff[offset++] = POS_SMALL; + } + + // normalize abs(val) to determine E + while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; } + while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; } + + offset = putVaruint64(buff, offset, e, !isNeg); // encode appropriate E value. + + // encode M by peeling off centimal digits, encoding x as 2x+1 + startM = offset; + for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { + abs = abs.movePointRight(2); + d = abs.intValue(); + buff[offset++] = (byte) ((2 * d + 1) & 0xff); + abs = abs.subtract(BigDecimal.valueOf(d)); + } + buff[offset - 1] &= 0xfe; // terminal digit should be 2x + if (isNeg) DESCENDING.apply(buff, startM, offset - startM); // negative values encoded as ~M + return offset; + } + + /** + * Encode the large magnitude floating point number val using + * the key encoding. The caller guarantees that val will be + * finite and abs(val) >= 1.0. + *

+ * A floating point value is encoded as an integer exponent E + * and a mantissa M. The original value is equal to + * (M * 100^E). E is set to the smallest value + * possible without making M greater than or equal to 1.0. + *

+ *

+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X>=0 and + * X<=99) then the byte value will be 2*X+1 for + * every byte of the mantissa, except for the last byte which will be + * 2*X+0. The mantissa must be the minimum number of bytes + * necessary to represent the value; trailing X==0 digits are + * omitted. This means that the mantissa will never contain a byte with the + * value 0x00. + *

+ *

+ * If E > 10, then this routine writes of E as a + * varint followed by the mantissa as described above. Otherwise, if + * E <= 10, this routine only writes the mantissa and leaves + * the E value to be encoded as part of the opening byte of the + * field by the calling function. + * + *

+   *   Encoding:  M       (if E<=10)
+   *              E M     (if E>10)
+   * 
+ *

+ * @param buff The destination to which encoded digits are written. + * @param val The value to encode. + * @return updated offset. + */ + private static int encodeNumericLarge(byte[] buff, int offset, BigDecimal val) { + // TODO: this can be done faster + // assert abs(val) >= 1.0 + BigDecimal abs = val.abs(); + assert BigDecimal.ONE.compareTo(abs) <= 0; + boolean isNeg = val.signum() == -1; + int e = 0, d, header = offset, startM; + + if (isNeg) { /* Large negative number: 0x08, ~E, ~M */ + buff[offset++] = NEG_LARGE; + } else { /* Large positive number: 0x22, E, M */ + buff[offset++] = POS_LARGE; + } + + // normalize abs(val) to determine E + while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; } + while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; } + while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; } + + // encode appropriate header byte and/or E value. + if (e > 10) { /* large number, write out {~,}E */ + offset = putVaruint64(buff, offset, e, isNeg); + } else { + if (isNeg) { /* Medium negative number: 0x13-E, ~M */ + buff[header] = (byte) (NEG_MED_MAX - e); + } else { /* Medium positive number: 0x17+E, M */ + buff[header] = (byte) (POS_MED_MIN + e); + } + } + + // encode M by peeling off centimal digits, encoding x as 2x+1 + startM = offset; + for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) { + abs = abs.movePointRight(2); + d = abs.intValue(); + buff[offset++] = (byte) (2 * d + 1); + abs = abs.subtract(BigDecimal.valueOf(d)); + } + + buff[offset - 1] &= 0xfe; // terminal digit should be 2x + if (isNeg) DESCENDING.apply(buff, startM, offset - startM); // negative values encoded as ~M + return offset; + } + + /** + * Encode a numerical value using the variable-length encoding. + * @return updated offset. + */ + public static int encodeNumeric(byte[] buff, int offset, long val, Order ord) { + return encodeNumeric(buff, offset, BigDecimal.valueOf(val), ord); + } + + /** + * Encode a numerical value using the variable-length encoding. + * @return updated offset. + */ + public static int encodeNumeric(byte[] buff, int offset, double val, Order ord) { + if (Double.isNaN(val)) { + buff[offset++] = ord.apply(NAN); + return offset; + } else if (val == Double.NEGATIVE_INFINITY) { + buff[offset++] = ord.apply(NEG_INF); + return offset; + } else if (val == Double.POSITIVE_INFINITY) { + buff[offset++] = ord.apply(POS_INF); + return offset; + } else if (val == 0.0) { + buff[offset++] = ord.apply(ZERO); + return offset; + } else { + return encodeNumeric(buff, offset, BigDecimal.valueOf(val), ord); + } + } + + /** + * Encode a numerical value using the variable-length encoding. + * @return updated offset. + */ + public static int encodeNumeric(byte[] buff, int offset, BigDecimal val, Order ord) { + int start = offset; + if (null == val) { + return encodeNull(buff, offset, ord); + } else if (BigDecimal.ZERO.compareTo(val) == 0) { + buff[offset++] = ord.apply(ZERO); + return offset; + } + BigDecimal abs = val.abs(); + if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0 + offset = encodeNumericLarge(buff, offset, val); + } else { // 1.0 > abs(v) > 0.0 + offset = encodeNumericSmall(buff, offset, val); + } + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Encode a numerical value using the variable-length encoding. + * @return updated offset. + */ + public static int encodeNumeric(byte[] buff, int offset, Numeric val, Order ord) { + if (null == val) { + return encodeNull(buff, offset, ord); + } else if (val.isInteger()) { + return encodeNumeric(buff, offset, val.longValue(), ord); + } else if (val.isReal()) { + return encodeNumeric(buff, offset, val.doubleValue(), ord); + } else { + return encodeNumeric(buff, offset, val.exactValue(), ord); + } + } + + /** + * Decode a Numerical value from the variable-length encoding. The backing + * array is not modified through use of this method. + */ + public static Numeric decodeNumeric(byte[] buff, int offset) { + byte header = buff[offset++]; + if (header == NULL || header == DESCENDING.apply(NULL)) + return null; + int e = 0; + boolean dsc = (-1 == Integer.signum(header)); + if (dsc) header = DESCENDING.apply(header); + + if (header == NAN) { + return Numeric.NaN; + } else if (header == NEG_INF) { + return Numeric.NEGATIVE_INFINITY; + } else if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */ + e = (int) getVaruint64(buff, offset, !dsc); + offset = skipVaruint64(buff, offset, !dsc); + return new Numeric(decodeSignificand(buff, offset, e, !dsc).negate()); + } else if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) { + /* Medium negative number: 0x13-E, ~M */ + e = NEG_MED_MAX - header; + return new Numeric(decodeSignificand(buff, offset, e, !dsc).negate()); + } else if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */ + e = (int) -getVaruint64(buff, offset, dsc); + offset = skipVaruint64(buff, offset, dsc); + return new Numeric(decodeSignificand(buff, offset, e, !dsc).negate()); + } else if (header == ZERO) { + return Numeric.ZERO; + } else if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */ + e = (int) -getVaruint64(buff, offset, !dsc); + offset = skipVaruint64(buff, offset, !dsc); + return new Numeric(decodeSignificand(buff, offset, e, dsc)); + } else if (header >= POS_MED_MIN && header <= POS_MED_MAX) { + /* Medium positive number: 0x17+E, M */ + e = header - POS_MED_MIN; + return new Numeric(decodeSignificand(buff, offset, e, dsc)); + } else if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */ + e = (int) getVaruint64(buff, offset, dsc); + offset = skipVaruint64(buff, offset, dsc); + return new Numeric(decodeSignificand(buff, offset, e, dsc)); + } else if (header == POS_INF) { + return Numeric.POSITIVE_INFINITY; + } else { + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(header)); + } + } + + /** + * Encode a String value. + * @return updated offset. + */ + public static int encodeString(byte[] buff, int offset, String val, Order ord) { + if (null == val) { + return encodeNull(buff, offset, ord); + } + if (val.contains("\u0000")) + throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'"); + int start = offset; + // TODO: is there no way to decode into buff directly? + byte[] bytes = val.getBytes(UTF8); + buff[offset++] = TEXT; + System.arraycopy(bytes, 0, buff, offset, bytes.length); + offset += bytes.length; + buff[offset++] = TERM; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Decode a String value. The backing array is not modified through use of + * this method. + */ + public static String decodeString(byte[] buff, int offset) { + byte header = buff[offset++]; + if (header == NULL || header == DESCENDING.apply(NULL)) + return null; + assert header == TEXT || header == DESCENDING.apply(TEXT); + Order ord = header == TEXT ? ASCENDING : DESCENDING; + int start = offset; + byte terminator = ord.apply(TERM); + while (buff[offset++] != terminator) ; + if (DESCENDING == ord) { + byte[] copy = Arrays.copyOfRange(buff, start, offset - 1); + ord.apply(copy); + return new String(copy, UTF8); + } else { + return new String(buff, start, offset - start - 1, UTF8); + } + } + + /** + * Calculate the expected BlobVar encoded length based on unencoded length. + */ + public static int blobVarEncodedLength(int len) { + if (0 == len) + return 2; // 1-byte header + 1-byte terminator + else + return (int) + Math.ceil( + (len * 8) // 8-bits per input byte + / 7.0) // 7-bits of input data per encoded byte, rounded up + + 1; // + 1-byte header + } + + /** + * Calculate the expected BlobVar decoded length based on encoded length. + */ + @VisibleForTesting + static int blobVarDecodedLength(int len) { + return + ((len + - 1) // 1-byte header + * 7) // 7-bits of payload per encoded byte + / 8; // 8-bits per byte + } + + /** + * Encode a Blob value using a modified varint encoding scheme. + *

+ * This format encodes a byte[] value such that no limitations on the input + * value are imposed. The first byte encodes the encoding scheme that + * follows, 0x35. Each encoded byte thereafter has a header bit indicating + * whether there is another encoded byte following. A header bit of '1' + * indicates continuation of the encoding. A header bit of '0' indicates + * this byte encodes the final byte. An empty input value is a special case, + * wherein a NULL byte is used as a termination byte. The remaining 7 bits + * on each encoded byte carry the value payload. + *

+ * @return updated offset. + */ + public static int + encodeBlobVar(byte[] buff, int offset, byte[] val, int voff, int vlen, Order ord) { + if (null == val) { + return encodeNull(buff, offset, ord); + } + // Empty value is null-terminated. All other values are encoded as 7-bits per byte. + assert buff.length - offset >= blobVarEncodedLength(vlen) : "buffer overflow expected."; + int start = offset; + buff[offset++] = BLOB_VAR; + if (0 == vlen) { + buff[offset++] = TERM; + } else { + byte s = 1, t = 0; + for (int i = voff; i < vlen; i++) { + buff[offset++] = (byte) (0x80 | t | ((val[i] & 0xff) >>> s)); + if (s < 7) { + t = (byte) (val[i] << (7 - s)); + s++; + } else { + buff[offset++] = (byte) (0x80 | val[i]); + s = 1; + t = 0; + } + } + if (s > 1) { + buff[offset++] = (byte) (0x7f & t); + } else { + buff[offset - 1] &= 0x7f; + } + } + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Encode a blob value using a modified varint encoding scheme. + * @return updated offset. + */ + public static int encodeBlobVar(byte[] buff, int offset, byte[] val, Order ord) { + return encodeBlobVar(buff, offset, val, 0, null != val ? val.length : 0, ord); + } + + /** + * Decode a blob value that was encoded using BlobVar encoding. The backing + * array is not modified through use of this method. + */ + public static byte[] decodeBlobVar(byte[] buff, int offset) { + byte header = buff[offset++]; + if (header == NULL || header == DESCENDING.apply(NULL)) + return null; + assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR); + Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING; + int start = offset, end; + if (buff[start] == ord.apply(TERM)) { + // skip empty input buffer. + return new byte[0]; + } + for (end = start; (byte) (ord.apply(buff[end]) & 0x80) != TERM; end++) ; + end++; // increment end to 1-past last byte + // create ret buffer using length of encoded data + 1 (header byte) + byte[] ret = new byte[blobVarDecodedLength(end - start + 1)]; + int roff = 0, s = 6; + byte t = (byte) ((ord.apply(buff[start]) << 1) & 0xff); + for (int i = start + 1; i < end; i++) { + if (s == 7) { + ret[roff++] = (byte) (t | (ord.apply(buff[i]) & 0x7f)); + i++; + } else { + ret[roff++] = (byte) (t | ((ord.apply(buff[i]) & 0x7f) >>> s)); + } + if (i == end) break; + t = (byte) ((ord.apply(buff[i]) << 8 - s) & 0xff); + s = s == 1 ? 7 : s - 1; + } + assert t == 0 : "Unexpected bits remaining after decoding blob."; + return ret; + } + + /** + * Encode a Blob value as a byte-for-byte copy. + * @return updated offset. + */ + public static int encodeBlobCopy(byte[] buff, int offset, byte[] val, int voff, int vlen, + Order ord) { + if (null == val) { + offset = encodeNull(buff, offset, ord); + if (DESCENDING == ord) { + // DESCENDING ordered BlobCopy requires a termination bit to preserve + // sort-order semantics of null values. + buff[offset++] = ord.apply(TERM); + } + return offset; + } + // Blobs as final entry in a compound key are written unencoded. + int overhead = ASCENDING == ord ? 1 : 2; + assert buff.length - offset >= vlen + overhead; + for (int i = voff; i < voff + vlen; i++) { + if (val[i] == 0x00) + throw new IllegalArgumentException("0x00 bytes not permitted in value."); + } + int start = offset; + buff[offset++] = BLOB_COPY; + System.arraycopy(val, voff, buff, offset, vlen); + offset += vlen; + // DESCENDING ordered BlobCopy requires a termination bit to preserve + // sort-order semantics of null values. + if (DESCENDING == ord) buff[offset++] = TERM; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Encode a Blob value as a byte-for-byte copy. + * @return updated offset. + */ + public static int encodeBlobCopy(byte[] buff, int offset, byte[] val, Order ord) { + return encodeBlobCopy(buff, offset, val, 0, null != val ? val.length : 0, ord); + } + + /** + * Decode a Blob value, byte-for-byte copy. The backing array is not + * modified through use of this method. + */ + public static byte[] decodeBlobCopy(byte[] buff, int offset) { + byte header = buff[offset++]; + if (header == NULL || header == DESCENDING.apply(NULL)) { + return null; + } + + assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY); + Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING; + int length = buff.length - offset - (ASCENDING == ord ? 0 : 1); + byte[] ret = new byte[length]; + System.arraycopy(buff, offset, ret, 0, length); + ord.apply(ret, 0, ret.length); + return ret; + } + + /** + * Encode a null value. + * @return updated offset. + */ + public static int encodeNull(byte[] buff, int offset, Order ord) { + buff[offset++] = ord.apply(NULL); + return offset; + } + + /** + * Encode an int32 value using the fixed-length encoding. + * @return updated offset. + */ + public static int encodeInt32(byte[] buff, int offset, int val, Order ord) { + int start = offset; + buff[offset++] = FIXED_INT32; + buff[offset++] = (byte) ((val >> 24) ^ 0x80); + buff[offset++] = (byte) (val >> 16); + buff[offset++] = (byte) (val >> 8); + buff[offset++] = (byte) val; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Decode an int32 value. The backing array is not modified + * through use of this method. + */ + public static int decodeInt32(byte[] buff, int offset) { + byte header = buff[offset++]; + assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32); + Order ord = header == 0x27 ? ASCENDING : DESCENDING; + int val = (ord.apply(buff[offset++]) ^ 0x80) & 0xff; + for (int i = 1; i < 4; i++) { + val = (val << 8) + (ord.apply(buff[offset++]) & 0xff); + } + return val; + } + + /** + * Encode an int64 value using the fixed-length encoding. + * @return updated offset. + */ + public static int encodeInt64(byte[] buff, int offset, long val, Order ord) { + int start = offset; + buff[offset++] = FIXED_INT64; + buff[offset++] = (byte) ((val >> 56) ^ 0x80); + buff[offset++] = (byte) (val >> 48); + buff[offset++] = (byte) (val >> 40); + buff[offset++] = (byte) (val >> 32); + buff[offset++] = (byte) (val >> 24); + buff[offset++] = (byte) (val >> 16); + buff[offset++] = (byte) (val >> 8); + buff[offset++] = (byte) val; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Decode an int64 value. The backing array is not modified + * through use of this method. + */ + public static long decodeInt64(byte[] buff, int offset) { + byte header = buff[offset++]; + assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64); + Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING; + long val = (ord.apply(buff[offset++]) ^ 0x80) & 0xff; + for (int i = 1; i < 8; i++) { + val = (val << 8) + (ord.apply(buff[offset++]) & 0xff); + } + return val; + } + + /** + * Encode a 32-bit floating point value using the fixed-length encoding. + * @return updated offset. + * @see #decodeFloat32(byte[], int) + */ + public static int encodeFloat32(byte[] buff, int offset, float val, Order ord) { + int start = offset; + int i = Float.floatToIntBits(val); + i ^= ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE); + buff[offset++] = FIXED_FLOAT32; + buff[offset++] = (byte) (i >> 24); + buff[offset++] = (byte) (i >> 16); + buff[offset++] = (byte) (i >> 8); + buff[offset++] = (byte) i; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Decode a 32-bit floating point value using the fixed-length encoding. + * @see #encodeFloat32(byte[], int, float, Order) + */ + public static float decodeFloat32(byte[] buff, int offset) { + byte header = buff[offset++]; + assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32); + Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING; + int val = ord.apply(buff[offset++]) & 0xff; + for (int i = 1; i < 4; i++) { + val = (val << 8) + (ord.apply(buff[offset++]) & 0xff); + } + val ^= (~val >> Integer.SIZE - 1) | Integer.MIN_VALUE; + return Float.intBitsToFloat(val); + } + + /** + * Encode a 64-bit floating point value using the fixed-length encoding. + *

+ * This format ensures the following total ordering of floating point + * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < + * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ... + * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN + *

+ * Floating point numbers are encoded as specified in IEEE 754. A 64-bit + * double precision float consists of a sign bit, 11-bit unsigned exponent + * encoded in offset-1023 notation, and a 52-bit significand. The format is + * described further in the Double Precision + * Floating Point Wikipedia page

+ *

+ * The value of a normal float is -1 sign bit × + * 2exponent - 1023 × 1.significand + *

+ *

+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *

+ *

+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is + * less than negative 1), and that all negative numbers compare less than + * any positive number. To accomplish this, we invert the sign bit of all + * floating point numbers, and we also invert the exponent and significand + * bits if the floating point number was negative. + *

+ *

+ * More specifically, we first store the floating point bits into a 64-bit + * long l using {@link Double#doubleToLongBits}. This method + * collapses all NaNs into a single, canonical NaN value but otherwise + * leaves the bits unchanged. We then compute + *

+ * + *
+   * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE
+   * 
+ *

+ * which inverts the sign bit and XOR's all other bits with the sign bit + * itself. Comparing the raw bytes of l in most significant + * byte order is equivalent to performing a double precision floating point + * comparison on the underlying bits (ignoring NaN comparisons, as NaNs + * don't compare equal to anything when performing floating point + * comparisons). + *

+ *

+ * The resulting long integer is then converted into a byte array by + * serializing the long one byte at a time in most significant byte order. + * The serialized integer is prefixed by a single header byte. All + * serialized values are 9 bytes in length. + *

+ * @return updated offset. + */ + public static int encodeFloat64(byte[] buff, int offset, double val, Order ord) { + int start = offset; + long lng = Double.doubleToLongBits(val); + lng ^= ((lng >> Long.SIZE - 1) | Long.MIN_VALUE); + buff[offset++] = FIXED_FLOAT64; + buff[offset++] = (byte) (lng >> 56); + buff[offset++] = (byte) (lng >> 48); + buff[offset++] = (byte) (lng >> 40); + buff[offset++] = (byte) (lng >> 32); + buff[offset++] = (byte) (lng >> 24); + buff[offset++] = (byte) (lng >> 16); + buff[offset++] = (byte) (lng >> 8); + buff[offset++] = (byte) lng; + ord.apply(buff, start, offset - start); + return offset; + } + + /** + * Decode a 64-bit floating point value using the fixed-length encoding. + * @see #encodeFloat64(byte[], int, double, Order) + */ + public static double decodeFloat64(byte[] buff, int offset) { + byte header = buff[offset++]; + assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64); + Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING; + long val = ord.apply(buff[offset++]) & 0xff; + for (int i = 1; i < 8; i++) { + val = (val << 8) + (ord.apply(buff[offset++]) & 0xff); + } + val ^= (~val >> Long.SIZE - 1) | Long.MIN_VALUE; + return Double.longBitsToDouble(val); + } + + /** + * Skip offset forward over one encoded value. + * @return updated offset. + */ + public static int skip(byte[] buff, int offset) { + byte x = buff[offset++]; + Order ord = (-1 == Integer.signum(x)) ? DESCENDING : ASCENDING; + x = ord.apply(x); + + switch (x) { + case NULL: + case NEG_INF: + return offset; + case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */ + offset = skipVaruint64(buff, offset, DESCENDING != ord); + return skipSignificand(buff, offset, DESCENDING != ord); + case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */ + case NEG_MED_MIN + 0x01: + case NEG_MED_MIN + 0x02: + case NEG_MED_MIN + 0x03: + case NEG_MED_MIN + 0x04: + case NEG_MED_MIN + 0x05: + case NEG_MED_MIN + 0x06: + case NEG_MED_MIN + 0x07: + case NEG_MED_MIN + 0x08: + case NEG_MED_MIN + 0x09: + case NEG_MED_MAX: + return skipSignificand(buff, offset, DESCENDING != ord); + case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */ + offset = skipVaruint64(buff, offset, DESCENDING == ord); + return skipSignificand(buff, offset, DESCENDING != ord); + case ZERO: + return offset; + case POS_SMALL: /* Small positive number: 0x16, ~-E, M */ + offset = skipVaruint64(buff, offset, DESCENDING != ord); + return skipSignificand(buff, offset, DESCENDING == ord); + case POS_MED_MIN: /* Medium positive number: 0x17+E, M */ + case POS_MED_MIN + 0x01: + case POS_MED_MIN + 0x02: + case POS_MED_MIN + 0x03: + case POS_MED_MIN + 0x04: + case POS_MED_MIN + 0x05: + case POS_MED_MIN + 0x06: + case POS_MED_MIN + 0x07: + case POS_MED_MIN + 0x08: + case POS_MED_MIN + 0x09: + case POS_MED_MAX: + return skipSignificand(buff, offset, DESCENDING == ord); + case POS_LARGE: /* Large positive number: 0x22, E, M */ + offset = skipVaruint64(buff, offset, DESCENDING == ord); + return skipSignificand(buff, offset, DESCENDING == ord); + case POS_INF: + return offset; + case NAN: + return offset; + case FIXED_INT32: + return offset + 4; + case FIXED_INT64: + return offset + 8; + case FIXED_FLOAT32: + return offset + 4; + case FIXED_FLOAT64: + return offset + 8; + case TEXT: + // for null-terminated values, skip to the end. + do { + x = ord.apply(buff[offset++]); + } while (x != TERM); + return offset; + case BLOB_VAR: + // read until we find a 0 in the MSB + do { + x = ord.apply(buff[offset++]); + } while ((byte) (x & 0x80) != TERM); + return offset; + case BLOB_COPY: + if (Order.DESCENDING == ord) { + // if descending, read to termination byte. + do { + x = ord.apply(buff[offset++]); + } while (x != TERM); + return offset; + } else { + // otherwise, just skip to the end. + return buff.length; + } + default: + throw new IllegalArgumentException("unexpected value in first byte: 0x" + + Long.toHexString(x)); + } + } + + /** + * Return the number of encoded entries remaining in buff. The + * state of buff is not modified through use of this method. + */ + public static int length(byte[] buff, int offset) { + int cnt = 0; + for (; offset != buff.length; offset = skip(buff, offset), cnt++) ; + return cnt; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java new file mode 100644 index 0000000..a950c27 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestOrderedBytes.java @@ -0,0 +1,755 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.math.BigDecimal; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestOrderedBytes { + + // integer constants for testing Numeric code paths + static final Long[] I_VALS = + { 0L, 1L, 10L, 99L, 100L, 1234L, 9999L, 10000L, 10001L, 12345L, 123450L, Long.MAX_VALUE }; + static final int[] I_LENGTHS = { 1, 2, 2, 2, 2, 3, 3, 2, 4, 4, 4, 11 }; + + // real constants for testing Numeric code paths + static final Double[] D_VALS = + { 0.0, 0.00123, 0.0123, 0.123, 1.0, 10.0, 12.345, 99.0, 99.01, 99.0001, 100.0, 100.01, + 100.1, 1234.0, 1234.5, 9999.0, 9999.000001, 9999.000009, 9999.00001, 9999.00009, + 9999.000099, 9999.0001, 9999.001, 9999.01, 9999.1, 10000.0, 10001.0, 12345.0, 123450.0, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.NaN, Double.MAX_VALUE }; + static final int[] D_LENGTHS = + { 1, 4, 4, 4, 2, 2, 4, 2, 3, 4, 2, 4, + 4, 3, 4, 3, 6, 6, 6, 6, + 6, 5, 5, 4, 4, 2, 4, 4, 4, + 1, 1, 1, 11 }; + + // fill in other gaps in Numeric code paths + static final Numeric[] N_VALS = + { null, new Numeric(Long.MAX_VALUE), new Numeric(Long.MIN_VALUE), + new Numeric(Double.MAX_VALUE), new Numeric(Double.MIN_VALUE), + new Numeric(BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100))) }; + static final int[] N_LENGTHS = + { 1, 11, 11, 11, 4, 12 }; + + /* + * This is the smallest difference between two doubles in D_VALS + */ + static final double MIN_EPSILON = 0.000001; + + /** + * Expected lengths of equivalent values should match + */ + @Test + public void testVerifyTestIntegrity() { + for (int i = 0; i < I_VALS.length; i++) { + for (int d = 0; d < D_VALS.length; d++) { + if (Math.abs(I_VALS[i] - D_VALS[d]) < MIN_EPSILON) { + assertEquals( + "Test inconsistency detected: expected lengths for " + I_VALS[i] + " do not match.", + I_LENGTHS[i], D_LENGTHS[d]); + } + } + } + } + + /** + * Tests the variable uint64 encoding. + *

+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775 + * 281474976710655 72057594037927935 18446744073709551615
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 9223372036854775808 = ff8000000000000000 (Long.MIN_VAL)
+ * 18446744073709551615 = ffffffffffffffffff
+ *

+ */ + @Test + public void testVaru64Boundaries() { + long vals[] = + { 239L, 240L, 2286L, 2287L, 67822L, 67823L, 16777214L, 16777215L, 4294967294L, 4294967295L, + 1099511627774L, 1099511627775L, 281474976710654L, 281474976710655L, 72057594037927934L, + 72057594037927935L, Long.MAX_VALUE - 1, Long.MAX_VALUE, Long.MIN_VALUE + 1, + Long.MIN_VALUE, -2L, -1L }; + int lens[] = { 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9 }; + assertEquals("Broken test!", vals.length, lens.length); + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (boolean comp : new boolean[] { true, false }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf = new byte[lens[i] + 1]; + int o = 1; // skip first byte + o = OrderedBytes.putVaruint64(buf, o, vals[i], comp); + assertEquals("Surprising serialized length.", lens[i], o - 1); + assertEquals(buf.length, o); + o = 1; // skip first byte + assertEquals("Length inspection failed.", + lens[i], OrderedBytes.skipVaruint64(buf, o, comp) - 1); + assertEquals("Deserialization failed.", vals[i], OrderedBytes.getVaruint64(buf, o, comp)); + } + } + } + + /** + * Test integer encoding. Example input values come from reference wiki + * page. + */ + @Test + public void testNumericInt() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < I_VALS.length; i++) { + byte[] buf1 = new byte[I_LENGTHS[i] + 1]; + int o = OrderedBytes.encodeNumeric(buf1, 1, I_VALS[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.length, o); + long decoded = OrderedBytes.decodeNumeric(buf1, 1).longValue(); + assertEquals( + "Decoded value does not match expected value.", + I_VALS[i].longValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[I_VALS.length][]; + for (int i = 0; i < I_VALS.length; i++) { + encoded[i] = new byte[I_LENGTHS[i] + 1]; + OrderedBytes.encodeNumeric(encoded[i], 1, I_VALS[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Long[] sortedVals = Arrays.copyOf(I_VALS, I_VALS.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + byte[] buf = encoded[i]; + long decoded = OrderedBytes.decodeNumeric(buf, 1).longValue(); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].longValue(), decoded); + } + } + } + + /** + * Test real encoding. Example input values come from reference wiki page. + */ + @Test + public void testNumericReal() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < D_VALS.length; i++) { + byte[] buf1 = new byte[D_LENGTHS[i] + 1]; + int o = OrderedBytes.encodeNumeric(buf1, 1, D_VALS[i], ord); + assertEquals(buf1.length, o); + double decoded = OrderedBytes.decodeNumeric(buf1, 1).doubleValue(); + assertEquals( + "Decoded value does not match expected value.", + D_VALS[i].doubleValue(), + decoded, MIN_EPSILON); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[D_VALS.length][]; + for (int i = 0; i < D_VALS.length; i++) { + encoded[i] = new byte[D_LENGTHS[i] + 1]; + OrderedBytes.encodeNumeric(encoded[i], 1, D_VALS[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Double[] sortedVals = Arrays.copyOf(D_VALS, D_VALS.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + byte[] buf = encoded[i]; + double decoded = OrderedBytes.decodeNumeric(buf, 1).doubleValue(); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].doubleValue(), decoded, MIN_EPSILON); + } + } + } + + /** + * Fill gaps in Numeric encoding testing. + */ + @Test + public void testNumericOther() { + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < N_VALS.length; i++) { + byte[] buf1 = new byte[N_LENGTHS[i] + 1]; + int o = OrderedBytes.encodeNumeric(buf1, 1, N_VALS[i], ord); + assertEquals(buf1.length, o); + Numeric decoded = OrderedBytes.decodeNumeric(buf1, 1); + assertEquals("Decoded value does not match expected value.", N_VALS[i], decoded); + } + } + } + + /** + * Verify Real and Int encodings are compatible. + */ + @Test + public void testNumericIntRealCompatibility() { + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < I_VALS.length; i++) { + // skip values for which BigDecimal instantiation drops precision + BigDecimal bdi = BigDecimal.valueOf(I_VALS[i]); + if (bdi.compareTo(BigDecimal.valueOf((double) I_VALS[i])) != 0) continue; + + // verify primitives + byte[] bi = new byte[I_LENGTHS[i]]; + byte[] br = new byte[I_LENGTHS[i]]; + OrderedBytes.encodeNumeric(bi, 0, I_VALS[i], ord); + OrderedBytes.encodeNumeric(br, 0, I_VALS[i], ord); + assertArrayEquals(bi, br); + assertEquals((long) I_VALS[i], OrderedBytes.decodeNumeric(bi, 0).longValue()); + assertEquals((long) I_VALS[i], (long) OrderedBytes.decodeNumeric(br, 0).doubleValue()); + + // verify BigDecimal for Real encoding + br = new byte[I_LENGTHS[i]]; + OrderedBytes.encodeNumeric(br, 0, bdi, ord); + assertArrayEquals(bi, br); + assertEquals(0, + bdi.compareTo(BigDecimal.valueOf(OrderedBytes.decodeNumeric(bi, 0).longValue()))); + } + } + } + + /** + * Test int32 encoding. + */ + @Test + public void testInt32() { + Integer[] vals = + { Integer.MIN_VALUE, Integer.MIN_VALUE / 2, 0, Integer.MAX_VALUE / 2, Integer.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf1 = new byte[5 + 1]; + int o = OrderedBytes.encodeInt32(buf1, 1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.length, o); + int decoded = OrderedBytes.decodeInt32(buf1, 1); + assertEquals("Decoded value does not match expected value.", + vals[i].intValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[5 + 1]; + OrderedBytes.encodeInt32(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Integer[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + byte[] buf = encoded[i]; + int decoded = OrderedBytes.decodeInt32(buf, 1); + assertEquals( + String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].intValue(), decoded); + } + } + } + + /** + * Test int64 encoding. + */ + @Test + public void testInt64() { + Long[] vals = { Long.MIN_VALUE, Long.MIN_VALUE / 2, 0L, Long.MAX_VALUE / 2, Long.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf1 = new byte[9 + 1]; + int o = OrderedBytes.encodeInt64(buf1, 1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.length, o); + long decoded = OrderedBytes.decodeInt64(buf1, 1); + assertEquals("Decoded value does not match expected value.", vals[i].longValue(), decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[9 + 1]; + OrderedBytes.encodeInt64(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Long[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + long decoded = OrderedBytes.decodeInt64(encoded[i], 1); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i].longValue(), decoded); + } + } + } + + /** + * Test float32 encoding. + */ + @Test + public void testFloat32() { + Float[] vals = + { Float.MIN_VALUE, Float.MIN_VALUE + 1.0f, 0.0f, Float.MAX_VALUE / 2.0f, Float.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf1 = new byte[5 + 1]; + int o = OrderedBytes.encodeFloat32(buf1, 1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.length, o); + float decoded = OrderedBytes.decodeFloat32(buf1, 1); + assertEquals("Decoded value does not match expected value.", + Float.floatToIntBits(vals[i].floatValue()), + Float.floatToIntBits(decoded)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[5 + 1]; + OrderedBytes.encodeFloat32(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Float[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + float decoded = OrderedBytes.decodeFloat32(encoded[i], 1); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + Float.floatToIntBits(sortedVals[i].floatValue()), + Float.floatToIntBits(decoded)); + } + } + } + + /** + * Test float64 encoding. + */ + @Test + public void testFloat64() { + Double[] vals = + { Double.MIN_VALUE, Double.MIN_VALUE + 1.0, 0.0, Double.MAX_VALUE / 2.0, Double.MAX_VALUE }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf1 = new byte[9 + 1]; + int o = OrderedBytes.encodeFloat64(buf1, 1, vals[i], ord); + assertEquals("Encoded value does not match expected length.", buf1.length, o); + double decoded = OrderedBytes.decodeFloat64(buf1, 1); + assertEquals("Decoded value does not match expected value.", + Double.doubleToLongBits(vals[i].doubleValue()), + Double.doubleToLongBits(decoded)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[9 + 1]; + OrderedBytes.encodeFloat64(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + Double[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + double decoded = OrderedBytes.decodeFloat64(encoded[i], 1); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + Double.doubleToLongBits(sortedVals[i].doubleValue()), + Double.doubleToLongBits(decoded)); + } + } + } + + /** + * Test string encoding. + */ + @Test + public void testString() { + String[] vals = { "foo", "bar", "baz" }; + int expectedLengths[] = { 5, 5, 5 }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (int i = 0; i < vals.length; i++) { + byte[] buf1 = new byte[expectedLengths[i] + 1]; + OrderedBytes.encodeString(buf1, 1, vals[i], ord); + assertEquals( + "Decoded value does not match expected value.", + vals[i], OrderedBytes.decodeString(buf1, 1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[expectedLengths[i] + 1]; + OrderedBytes.encodeString(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + String[] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals); + else Arrays.sort(sortedVals, Collections.reverseOrder()); + + for (int i = 0; i < sortedVals.length; i++) { + String decoded = OrderedBytes.decodeString(encoded[i], 1); + assertEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + @Test(expected = IllegalArgumentException.class) + public void testStringNoNullChars() { + byte[] buff = new byte[3]; + OrderedBytes.encodeString(buff, 0, "\u0000", Order.ASCENDING); + } + + /** + * Test length estimation algorithms for BlobVar encoding. Does not cover + * 0-length input case properly. + */ + @Test + public void testBlobVarLencodedLength() { + int[][] values = { + /* decoded length, encoded length + * ceil((n bytes * 8 bits/input byte) / 7 bits/encoded byte) + 1 header + */ + { 1, 3 }, { 2, 4 }, { 3, 5 }, { 4, 6 }, + { 5, 7 }, { 6, 8 }, { 7, 9 }, { 8, 11 } + }; + + for (int[] pair : values) { + assertEquals(pair[1], OrderedBytes.blobVarEncodedLength(pair[0])); + assertEquals(pair[0], OrderedBytes.blobVarDecodedLength(pair[1])); + } + } + + /** + * Test BlobVar encoding. + */ + @Test + public void testBlobVar() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa }, + { (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, + (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55 }, + "1".getBytes(), "22".getBytes(), "333".getBytes(), "4444".getBytes(), + "55555".getBytes(), "666666".getBytes(), "7777777".getBytes(), "88888888".getBytes() + }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + byte[] buf1 = new byte[OrderedBytes.blobVarEncodedLength(val.length) + 1]; + OrderedBytes.encodeBlobVar(buf1, 1, val, ord); + byte[] decoded = OrderedBytes.decodeBlobVar(buf1, 1); + assertArrayEquals("Decoded value does not match expected value.", val, decoded); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[OrderedBytes.blobVarEncodedLength(vals[i].length) + 1]; + OrderedBytes.encodeBlobVar(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + byte[] decoded = OrderedBytes.decodeBlobVar(encoded[i], 1); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + } + + /** + * Test BlobCopy encoding. + */ + @Test + public void testBlobCopy() { + byte[][] vals = + { "".getBytes(), "foo".getBytes(), "foobarbazbub".getBytes(), + { (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, + (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa, (byte) 0xaa }, + { (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, + (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55, (byte) 0x55 }, + }; + + /* + * assert encoded values match decoded values. encode into target buffer + * starting at an offset to detect over/underflow conditions. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + for (byte[] val : vals) { + byte[] buf1 = new byte[val.length + (ord == Order.ASCENDING ? 1 : 2) + 1]; + OrderedBytes.encodeBlobCopy(buf1, 1, val, ord); + assertArrayEquals( + "Decoded value does not match expected value.", + val, OrderedBytes.decodeBlobCopy(buf1, 1)); + } + } + + /* + * assert natural sort order is preserved by the codec. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[][] encoded = new byte[vals.length][]; + for (int i = 0; i < vals.length; i++) { + encoded[i] = new byte[vals[i].length + (ord == Order.ASCENDING ? 2 : 3)]; + OrderedBytes.encodeBlobCopy(encoded[i], 1, vals[i], ord); + } + + Arrays.sort(encoded, Bytes.BYTES_COMPARATOR); + byte[][] sortedVals = Arrays.copyOf(vals, vals.length); + if (ord == Order.ASCENDING) Arrays.sort(sortedVals, Bytes.BYTES_COMPARATOR); + else Arrays.sort(sortedVals, Collections.reverseOrder(Bytes.BYTES_COMPARATOR)); + + for (int i = 0; i < sortedVals.length; i++) { + byte[] decoded = OrderedBytes.decodeBlobCopy(encoded[i], 1); + assertArrayEquals(String.format( + "Encoded representations do not preserve natural order: <%s>, <%s>, %s", + sortedVals[i], decoded, ord), + sortedVals[i], decoded); + } + } + + /* + * assert byte[] segments are serialized correctly. + */ + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[] buf = new byte[3 + (Order.ASCENDING == ord ? 0 : 1) + 1]; + OrderedBytes.encodeBlobCopy(buf, 0, "foobarbaz".getBytes(), 3, 3, ord); + assertArrayEquals("bar".getBytes(), OrderedBytes.decodeBlobCopy(buf, 0)); + } + } + + /** + * Assert invalid input byte[] are rejected by BlobCopy + */ + @Test(expected = IllegalArgumentException.class) + public void testBlobCopyNoZeroBytes() { + byte[] val = { 0x01, 0x02, 0x00, 0x03 }; + byte[] buf = new byte[val.length + 2]; + OrderedBytes.encodeBlobCopy(buf, 0, val, Order.ASCENDING); + fail("test should never get here."); + } + + /** + * Test generic skip logic + */ + @Test + public void testSkip() { + BigDecimal longMax = BigDecimal.valueOf(Long.MAX_VALUE); + double negInf = Double.NEGATIVE_INFINITY; + BigDecimal negLarge = longMax.multiply(longMax).negate(); + BigDecimal negMed = new BigDecimal("-10.0"); + BigDecimal negSmall = new BigDecimal("-0.0010"); + long zero = 0l; + BigDecimal posSmall = negSmall.negate(); + BigDecimal posMed = negMed.negate(); + BigDecimal posLarge = negLarge.negate(); + double posInf = Double.POSITIVE_INFINITY; + double nan = Double.NaN; + int int32 = 100; + long int64 = 100l; + float float32 = 100.0f; + double float64 = 100.0d; + String text = "hello world."; + byte[] blobVar = Bytes.toBytes("foo"); + byte[] blobCopy = Bytes.toBytes("bar"); + + for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) { + byte[] buff = new byte[30]; + int o; + o = OrderedBytes.encodeNull(buff, 0, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, negInf, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, negLarge, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, negMed, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, negSmall, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, zero, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, posSmall, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, posMed, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, posLarge, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, posInf, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeNumeric(buff, 0, nan, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeInt32(buff, 0, int32, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeInt64(buff, 0, int64, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeFloat32(buff, 0, float32, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeFloat64(buff, 0, float64, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeString(buff, 0, text, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + o = OrderedBytes.encodeBlobVar(buff, 0, blobVar, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + + // blobCopy is special in that it runs to the end of the target buffer. + buff = new byte[blobCopy.length + (Order.ASCENDING == ord ? 1 : 2)]; + o = OrderedBytes.encodeBlobCopy(buff, 0, blobCopy, ord); + assertEquals(o, OrderedBytes.skip(buff, 0)); + } + } +} -- 1.8.3.2