From b92942542e72ffe3e36ca52008e9f44b1d880eff Mon Sep 17 00:00:00 2001
From: Nick Dimiduk
+ * Each value is encoded as one or more bytes. The first byte of the encoding,
+ * its meaning, and a terse description of the bytes that follow is given by
+ * the following table:
+ * byte[] of variable-length.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedBlob extends OrderedBytesBaseval to buff.
+ */
+ public void encode(ByteRange buff, byte[] val, int length, int offset) {
+ OrderedBytes.encodeBlobCopy(buff, val, offset, length, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBlobVar.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBlobVar.java
new file mode 100644
index 0000000..fdfad7c
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBlobVar.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+/**
+ * An alternative to {@link OrderedBlob} for use by {@link Struct} fields that
+ * do not terminate the fields list.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class OrderedBlobVar extends OrderedBytesBaseval to buff.
+ */
+ public void encode(ByteRange buff, byte[] val, int length, int offset) {
+ OrderedBytes.encodeBlobVar(buff, val, offset, length, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBytesBase.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBytesBase.java
new file mode 100644
index 0000000..908c7cc
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedBytesBase.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+/**
+ * Base class for data types backed by the {@link OrderedBytes} encoding
+ * implementations.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public abstract class OrderedBytesBasefloat of 32-bits using a fixed-length encoding.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedFloat32 extends OrderedBytesBasefloat value from the buffer buff.
+ */
+ public float decodeFloat(ByteRange buff) {
+ return OrderedBytes.decodeFloat32(buff);
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeFloat(ByteRange buff, float val) {
+ OrderedBytes.encodeFloat32(buff, val, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedFloat64.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedFloat64.java
new file mode 100644
index 0000000..04f7a90
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedFloat64.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+
+/**
+ * A double of 64-bits using a fixed-length encoding.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedFloat64 extends OrderedBytesBasedouble value from the buffer buff.
+ */
+ public double decodeDouble(ByteRange buff) {
+ return OrderedBytes.decodeFloat64(buff);
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeDouble(ByteRange buff, double val) {
+ OrderedBytes.encodeFloat64(buff, val, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt32.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt32.java
new file mode 100644
index 0000000..e997fb5
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt32.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+
+/**
+ * An int of 32-bits using a fixed-length encoding.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedInt32 extends OrderedBytesBaseint value from the buffer buff.
+ */
+ public int decodeInt(ByteRange buff) {
+ return OrderedBytes.decodeInt32(buff);
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeInt(ByteRange buff, int val) {
+ OrderedBytes.encodeInt32(buff, val, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt64.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt64.java
new file mode 100644
index 0000000..731fc8c
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedInt64.java
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+
+/**
+ * A long of 64-bits using a fixed-length encoding.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedInt64 extends OrderedBytesBaselong value from the buffer buff.
+ */
+ public long decodeLong(ByteRange buff) {
+ return OrderedBytes.decodeInt64(buff);
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeLong(ByteRange buff, long val) {
+ OrderedBytes.encodeInt64(buff, val, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedNumeric.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedNumeric.java
new file mode 100644
index 0000000..7a301dc
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedNumeric.java
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Numeric;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+/**
+ * An {@link Number} of arbitrary precision and variable-length encoding. The
+ * resulting length of encoded values is determined by the numerical (base
+ * 100) precision, not absolute value. Use this data type anywhere you would
+ * expect to use a DECIMAL type, a {@link BigDecimal}, a
+ * {@link BigInteger}, or any time you've parsed floating precision values
+ * from text.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedNumeric extends OrderedBytesBaselong value from the buffer buff.
+ */
+ public long decodeLong(ByteRange buff) {
+ return OrderedBytes.decodeNumeric(buff).longValue();
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeLong(ByteRange buff, long val) {
+ OrderedBytes.encodeNumeric(buff, val, order);
+ }
+
+ /**
+ * Read a double value from the buffer buff.
+ */
+ public double decodeDouble(ByteRange buff) {
+ return OrderedBytes.decodeNumeric(buff).doubleValue();
+ }
+
+ /**
+ * Write instance val into buffer buff.
+ */
+ public void encodeDouble(ByteRange buff, double val) {
+ OrderedBytes.encodeNumeric(buff, val, order);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedString.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedString.java
new file mode 100644
index 0000000..b7a7714
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/types/OrderedString.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.OrderedBytes;
+
+/**
+ * A String of variable-length.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class OrderedString extends OrderedBytesBaseNumeric instance over a double.
+ */
+ public Numeric(double val) {
+ isR = true;
+ r = val;
+ isZ = false;
+ z = 0;
+ bd = null;
+ }
+
+ /**
+ * Create an Numeric instance over a long.
+ */
+ public Numeric(long val) {
+ isZ = true;
+ z = val;
+ isR = false;
+ r = 0.0;
+ bd = null;
+ }
+
+ /**
+ * Create an Numeric instance over a BigDecimal .
+ */
+ public Numeric(BigDecimal val) {
+ if (null == val) throw new NullPointerException();
+
+ // see if this can be a long instead
+ boolean isLong = false;
+ long lng = 0;
+ try {
+ lng = val.longValueExact();
+ isLong = true;
+ } catch (ArithmeticException e) {
+ }
+
+ if (isLong) {
+ isZ = true;
+ z = lng;
+ isR = false;
+ r = 0.0;
+ bd = null;
+ } else {
+ // doesn't fit in a long, fall back to BD
+ bd = val.round(MATH_CONTEXT);
+ isZ = false;
+ isR = false;
+ z = 0;
+ r = 0.0;
+ }
+ }
+
+ /**
+ * Returns true if the Number is an Integer and
+ * fits in a long, false otherwise.
+ */
+ public boolean isInteger() {
+ return isZ;
+ }
+
+ /**
+ * Returns true if the Number is a Real and fits
+ * in a double, false otherwise.
+ */
+ public boolean isReal() {
+ return isR;
+ }
+
+ /**
+ * Returns true if the Number is infinitely large
+ * in magnitude, false otherwise.
+ */
+ public boolean isInfinite() {
+ return isR && Double.isInfinite(r);
+ }
+
+ /**
+ * Returns true if the Number is a Not-a-Number
+ * (NaN) value, false otherwise.
+ */
+ public boolean isNaN() {
+ return isR && Double.isNaN(r);
+ }
+
+ /**
+ * Retrieve the value as a BigDecimal. This will silently
+ * promote a double or long to a
+ * BigDecimal when possible, so use it only if a primitive
+ * value is not available. Check availability using {@link #isInteger()} and
+ * {@link #isReal()}.
+ * @throws NumberFormatException if the
+ * Number is infinite or NaN.
+ */
+ public BigDecimal exactValue() {
+ return null == bd ? isR ? BigDecimal.valueOf(r) : BigDecimal.valueOf(z) : bd;
+ }
+
+ @Override
+ public double doubleValue() {
+ return isReal() ? r : isInteger() ? (double) z : bd.doubleValue();
+ }
+
+ @Override
+ public int intValue() {
+ return isInteger() ? (int) z : isReal() ? (int) r : bd.intValue();
+ }
+
+ @Override
+ public long longValue() {
+ return isInteger() ? z : isReal() ? (long) r : bd.longValue();
+ }
+
+ @Override
+ public float floatValue() {
+ return isReal() ? (float) r : isInteger() ? (float) z : bd.floatValue();
+ }
+
+ @Override
+ public String toString() {
+ return isReal() ? Double.toString(r) : isInteger() ? Long.toString(z) : bd.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (null == o) return false;
+ if (!(o instanceof Numeric)) return false;
+ Numeric that = (Numeric) o;
+ if (this.isInteger() && that.isInteger()) return this.longValue() == that.longValue();
+ if (this.isReal() && that.isReal()) return this.doubleValue() == that.doubleValue();
+ return 0 == this.exactValue().compareTo(that.exactValue());
+ }
+
+ @Override
+ public int hashCode() {
+ if (0 != hashCode) return hashCode;
+ int result = 1;
+ if (isInteger()) {
+ result = result * 23 + (int) (z ^ (z >>> 32));
+ } else if (isReal()) {
+ long bits = Double.doubleToLongBits(r);
+ result = result * 13 + (int) (bits ^ (bits >>> 32));
+ } else {
+ result = result * 17 + bd.hashCode();
+ }
+ hashCode = result;
+ return result;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
new file mode 100644
index 0000000..0bc2abd
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/OrderedBytes.java
@@ -0,0 +1,1318 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.apache.hadoop.hbase.types.Order.ASCENDING;
+import static org.apache.hadoop.hbase.types.Order.DESCENDING;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.types.Order;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Utility class that handles ordered byte arrays. That is, unlike
+ * {@link Bytes}, these methods produce byte arrays which maintain the sort
+ * order of the original values.
+ * Encoding Format summary
+ *
+ *
+ *
+ * Content Type Encoding
+ * NULL 0x05
+ * negative infinity 0x07
+ * negative large 0x08, ~E, ~M
+ * negative medium 0x13-E, ~M
+ * negative small 0x14, -E, ~M
+ * zero 0x15
+ * positive small 0x16, ~-E, M
+ * positive medium 0x17+E, M
+ * positive large 0x22, E, M
+ * positive infinity 0x23
+ * NaN 0x25
+ * fixed-length 32-bit integer 0x27, I
+ * fixed-length 64-bit integer 0x28, I
+ * fixed-length 32-bit float 0x30, F
+ * fixed-length 64-bit float 0x31, F
+ * text 0x33, T
+ * variable binary 0x35, B
+ * copy binary 0x36, X
+ * Each value that is a NULL encodes as a single byte of 0x05. Since every + * other value encoding begins with a byte greater than 0x05, this forces NULL + * values to sort first. + *
+ *+ * Each text value begins with a single byte of 0x33 and ends with a single + * byte of 0x00. There are zero or more intervening bytes that encode the text + * value. The intervening bytes are chosen so that the encoding will sort in + * the desired collating order. The intervening bytes may not contain a 0x00 + * character; the only 0x00 byte allowed in a text encoding is the final byte. + *
+ *+ * The text encoding ends in 0x00 in order to ensure that when there are two + * strings where one is a prefix of the other that the shorter string will + * sort first. + *
+ *+ * There are two encoding strategies for binary fields, referred to as + * "BlobVar" and "BlobCopy". BlobVar is less efficient in both space and + * encoding time. It has no limitations on the range of encoded values. + * BlobCopy is a byte-for-byte copy of the input data followed by a + * termination byte. It is extremely fast to encode and decode. It carries the + * restriction of not allowing a 0x00 value in the input byte[] as this value + * is used as the termination byte. + *
+ *
+ * "BlobVar" encodes the input byte[] in a manner similar to a variable length
+ * integer encoding. As with the other OrderedBytes encodings,
+ * the first encoded byte is used to indicate what kind of value follows. This
+ * header byte is 0x35 for BlobVar encoded values. As with the traditional
+ * varint encoding, the most significant bit of each subsequent encoded
+ * byte is used as a continuation marker. The 7 remaining bits
+ * contain the 7 most significant bits of the first unencoded byte. The next
+ * encoded byte starts with a continuation marker in the MSB. The least
+ * significant bit from the first unencoded byte follows, and the remaining 6
+ * bits contain the 6 MSBs of the second unencoded byte. The encoding
+ * continues, encoding 7 bytes on to 8 encoded bytes. The MSB of the final
+ * encoded byte contains a termination marker rather than a continuation
+ * marker, and any remaining bits from the final input byte. Any trailing bits
+ * in the final encoded byte are zeros.
+ *
+ * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x36 + * as the header byte, and is terminated by 0x00. This alternative encoding is + * more efficient, but it cannot accept values containing a 0x00 byte. + *
+ *+ * Numeric values must be coded so as to sort in numeric order. We assume that + * numeric values can be both integer and floating point values. The wrapper + * class {@link Numeric} is used to smooth over values decoded using this + * scheme. + *
+ *+ * Simplest cases first: If the numeric value is a NaN, then the encoding is a + * single byte of 0x25. This causes NaN values to sort after to every other + * numeric value. + *
+ *+ * If the numeric value is a negative infinity then the encoding is a single + * byte of 0x07. Since every other numeric value except NaN has a larger + * initial byte, this encoding ensures that negative infinity will sort prior + * to every other numeric value other than NaN. + *
+ *+ * If the numeric value is a positive infinity then the encoding is a single + * byte of 0x23. Every other numeric value encoding begins with a smaller + * byte, ensuring that positive infinity always sorts last among numeric + * values. 0x23 is also smaller than 0x33, the initial byte of a text value, + * ensuring that every numeric value sorts before every text value. + *
+ *+ * If the numeric value is exactly zero then it is encoded as a single byte of + * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14 + * and finite positive values will have initial bytes of 0x16 through 0x22. + *
+ *+ * For all numeric values, we compute a mantissa M and an exponent E. The + * mantissa is a base-100 representation of the value. The exponent E + * determines where to put the decimal point. + *
+ *+ * Each centimal digit of the mantissa is stored in a byte. If the value of + * the centimal digit is X (hence X≥0 and X≤99) then the byte value will + * be 2*X+1 for every byte of the mantissa, except for the last byte which + * will be 2*X+0. The mantissa must be the minimum number of bytes necessary + * to represent the value; trailing X==0 digits are omitted. This means that + * the mantissa will never contain a byte with the value 0x00. + *
+ *+ * If we assume all digits of the mantissa occur to the right of the decimal + * point, then the exponent E is the power of one hundred by which one must + * multiply the mantissa to recover the original value. + *
+ *+ * Values are classified as large, medium, or small according to the value of + * E. If E is 11 or more, the value is large. For E between 0 and 10, the + * value is medium. For E less than zero, the value is small. + *
+ *+ * Large positive values are encoded as a single byte 0x22 followed by E as a + * varint and then M. Medium positive values are a single byte of 0x17+E + * followed by M. Small positive values are encoded as a single byte 0x16 + * followed by the ones-complement of the varint for -E followed by M. + *
+ *+ * Small negative values are encoded as a single byte 0x14 followed by -E as a + * varint and then the ones-complement of M. Medium negative values are + * encoded as a byte 0x13-E followed by the ones-complement of M. Large + * negative values consist of the single byte 0x08 followed by the + * ones-complement of the varint encoding of E followed by the ones-complement + * of M. + *
+ *+ * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte + * format. All 8-byte integers are serialized to the equivelant 9-byte format. + * Serialization is performed by writing a header byte, inverting the integer + * sign bit and writing the resulting bytes to the byte array in big endian + * order. + *
+ *+ * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte + * encoding format, respectively. The format is identical, save for the + * precision respected in each step of the operation. + *
+ * This format ensures the following total ordering of floating point values: + * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... < + * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... < + * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN + *
+ *+ * Floating point numbers are encoded as specified in IEEE 754. A 32-bit + * single precision float consists of a sign bit, 8-bit unsigned exponent + * encoded in offset-127 notation, and a 23-bit significand. The format is + * described further in the Single Precision + * Floating Point Wikipedia page + *
+ *+ * The value of a normal float is -1 sign bit × + * 2exponent - 127 × 1.significand + *
+ *+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *
+ *+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is less + * than negative 1), and that all negative numbers compare less than any + * positive number. To accomplish this, we invert the sign bit of all floating + * point numbers, and we also invert the exponent and significand bits if the + * floating point number was negative. + *
+ *
+ * More specifically, we first store the floating point bits into a 32-bit int
+ * j using {@link Float#floatToIntBits}. This method collapses
+ * all NaNs into a single, canonical NaN value but otherwise leaves the bits
+ * unchanged. We then compute
+ *
+ * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE + *+ *
+ * which inverts the sign bit and XOR's all other bits with the sign bit
+ * itself. Comparing the raw bytes of j in most significant byte
+ * order is equivalent to performing a single precision floating point
+ * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't
+ * compare equal to anything when performing floating point comparisons).
+ *
+ * The resulting integer is then converted into a byte array by serializing + * the integer one byte at a time in most significant byte order. The + * serialized integer is prefixed by a single header byte. All serialized + * values are 5 bytes in length. + *
+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class OrderedBytes { + + /* + * The following constant values are used by encoding implementations + */ + + public static final byte TERM = 0x00; + public static final byte NULL = 0x05; + public static final byte NEG_INF = 0x07; + public static final byte NEG_LARGE = 0x08; + public static final byte NEG_MED_MIN = 0x09; + public static final byte NEG_MED_MAX = 0x13; + public static final byte NEG_SMALL = 0x14; + public static final byte ZERO = 0x15; + public static final byte POS_SMALL = 0x16; + public static final byte POS_MED_MIN = 0x17; + public static final byte POS_MED_MAX = 0x21; + public static final byte POS_LARGE = 0x22; + public static final byte POS_INF = 0x23; + public static final byte NAN = 0x25; + public static final byte FIXED_INT32 = 0x27; + public static final byte FIXED_INT64 = 0x28; + public static final byte FIXED_FLOAT32 = 0x30; + public static final byte FIXED_FLOAT64 = 0x31; + public static final byte TEXT = 0x33; + public static final byte BLOB_VAR = 0x35; + public static final byte BLOB_COPY = 0x36; + + public static final Charset UTF8 = Charset.forName("UTF-8"); + + // constants used for numeric {en,de}coding + private static final BigDecimal NEG_ONE = BigDecimal.ONE.negate(); + private static final BigDecimal E8 = BigDecimal.valueOf(1e8); + private static final BigDecimal E32 = BigDecimal.valueOf(1e32); + private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2); + private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10); + + /** + * Perform unsigned comparison between two long values. Conforms to the same + * interface as {@link Comparator#compare(Object, Object)}. + */ + private static int unsignedCmp(long x1, long x2) { + int cmp; + if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0; + // invert the result when either value is negative + if ((x1 < 0) != (x2 < 0)) return -cmp; + return cmp; + } + + /** + * Write a 32-bit unsigned integer todst as 4 big-endian
+ * bytes.
+ * @return number of bytes written.
+ */
+ private static int putUint32(ByteRange dst, int val) {
+ dst.put((byte) (val >>> 24))
+ .put((byte) (val >>> 16))
+ .put((byte) (val >>> 8))
+ .put((byte) val);
+ return 4;
+ }
+
+ /**
+ * Encode an unsigned 64-bit integer val into dst.
+ * Compliment the encoded value when comp is true.
+ */
+ @VisibleForTesting
+ static int putVaruint64(ByteRange dst, long val, boolean comp) {
+ int w, y, start = dst.getPosition();
+ byte[] a = dst.getBytes();
+ Order ord = comp ? DESCENDING : ASCENDING;
+ if (-1 == unsignedCmp(val, 241L)) {
+ dst.put((byte) val);
+ ord.apply(a, start, 1);
+ return 1;
+ }
+ if (-1 == unsignedCmp(val, 2288L)) {
+ y = (int) (val - 240);
+ dst.put((byte) (y / 256 + 241))
+ .put((byte) (y % 256));
+ ord.apply(a, start, 2);
+ return 2;
+ }
+ if (-1 == unsignedCmp(val, 67824L)) {
+ y = (int) (val - 2288);
+ dst.put((byte) 249)
+ .put((byte) (y / 256))
+ .put((byte) (y % 256));
+ ord.apply(a, start, 3);
+ return 3;
+ }
+ y = (int) (val & 0xffffffff);
+ w = (int) (val >>> 32);
+ if (w == 0) {
+ if (-1 == unsignedCmp(y, 16777216L)) {
+ dst.put((byte) 250)
+ .put((byte) (y >>> 16))
+ .put((byte) (y >>> 8))
+ .put((byte) y);
+ ord.apply(a, start, 4);
+ return 4;
+ }
+ dst.put((byte) 251);
+ putUint32(dst, y);
+ ord.apply(a, start, 5);
+ return 5;
+ }
+ if (-1 == unsignedCmp(w, 256L)) {
+ dst.put((byte) 252)
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 6);
+ return 6;
+ }
+ if (-1 == unsignedCmp(w, 65536L)) {
+ dst.put((byte) 253)
+ .put((byte) (w >>> 8))
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 7);
+ return 7;
+ }
+ if (-1 == unsignedCmp(w, 16777216L)) {
+ dst.put((byte) 254)
+ .put((byte) (w >>> 16))
+ .put((byte) (w >>> 8))
+ .put((byte) w);
+ putUint32(dst, y);
+ ord.apply(a, start, 8);
+ return 8;
+ }
+ dst.put((byte) 255);
+ putUint32(dst, w);
+ putUint32(dst, y);
+ ord.apply(a, start, 9);
+ return 9;
+ }
+
+ /**
+ * Inspect an encoded varu64 for it's encoded length. Does not modify
+ * src's state.
+ * @param src source buffer
+ * @param comp if true, parse the compliment of the value.
+ * @return number of bytes consumed by this value
+ */
+ @VisibleForTesting
+ static int lengthVaru64(ByteRange src, boolean comp) {
+ byte[] a = src.getBytes();
+ int i = src.getPosition();
+ int a0 = (comp ? DESCENDING : ASCENDING).apply(a[i]) & 0xff;
+ if (a0 <= 240) return 1;
+ if (a0 >= 241 && a0 <= 248) return 2;
+ if (a0 == 249) return 3;
+ if (a0 == 250) return 4;
+ if (a0 == 251) return 5;
+ if (a0 == 252) return 6;
+ if (a0 == 253) return 7;
+ if (a0 == 254) return 8;
+ if (a0 == 255) return 9;
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(a[i]));
+ }
+
+ /**
+ * Decode a sequence of bytes in buff as an unsigned 64-bit
+ * integer. Compliment the encoded value when comp is true.
+ */
+ @VisibleForTesting
+ static long getVaruint64(ByteRange buff, boolean comp) {
+ assert buff.getRemaining() >= lengthVaru64(buff, comp);
+ long ret;
+ Order ord = comp ? DESCENDING : ASCENDING;
+ byte x = buff.get();
+ int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8;
+ if (-1 == unsignedCmp(a0, 241)) {
+ return a0;
+ }
+ x = buff.get();
+ a1 = ord.apply(x) & 0xff;
+ if (-1 == unsignedCmp(a0, 249)) {
+ return (a0 - 241) * 256 + a1 + 240;
+ }
+ x = buff.get();
+ a2 = ord.apply(x) & 0xff;
+ if (a0 == 249) {
+ return 2288 + 256 * a1 + a2;
+ }
+ x = buff.get();
+ a3 = ord.apply(x) & 0xff;
+ if (a0 == 250) {
+ return (a1 << 16) | (a2 << 8) | a3;
+ }
+ x = buff.get();
+ a4 = ord.apply(x) & 0xff;
+ ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4;
+ if (a0 == 251) {
+ return ret;
+ }
+ x = buff.get();
+ a5 = ord.apply(x) & 0xff;
+ if (a0 == 252) {
+ return (ret << 8) | a5;
+ }
+ x = buff.get();
+ a6 = ord.apply(x) & 0xff;
+ if (a0 == 253) {
+ return (ret << 16) | (a5 << 8) | a6;
+ }
+ x = buff.get();
+ a7 = ord.apply(x) & 0xff;
+ if (a0 == 254) {
+ return (ret << 24) | (a5 << 16) | (a6 << 8) | a7;
+ }
+ x = buff.get();
+ a8 = ord.apply(x) & 0xff;
+ return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8;
+ }
+
+ /**
+ * Skip buff over the encoded bytes.
+ */
+ static void skipVaruint64(ByteRange buff, boolean comp) {
+ buff.setPosition(buff.getPosition() + lengthVaru64(buff, comp));
+ }
+
+ /**
+ * Read significand digits from buff according to the magnitude
+ * of e.
+ * @param buff The source from which to read encoded digits.
+ * @param e The magnitude of the first digit read.
+ * @param comp Treat encoded bytes as compliments when comp is true.
+ * @return The decoded value.
+ */
+ private static BigDecimal decodeSignificand(ByteRange buff, int e, boolean comp) {
+ // TODO: can this be made faster?
+ byte[] a = buff.getBytes();
+ BigDecimal m = BigDecimal.ZERO;
+ e--;
+ for (int i = buff.getPosition();; i++) {
+ // base-100 digits are encoded as val * 2 + 1 except for the termination digit.
+ m = m.add( // m +=
+ new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit]
+ BigDecimal.valueOf(((comp ? DESCENDING : ASCENDING).apply(a[i]) & 0xff) / 2)));
+ e--;
+ // detect termination digit
+ if (((comp ? DESCENDING : ASCENDING).apply(a[i]) & 1) == 0) {
+ buff.setPosition(i + 1);
+ break;
+ }
+ }
+ return m;
+ }
+
+ /**
+ * Skip buff over the significand bytes.
+ */
+ private static void skipSignificand(ByteRange buff, boolean comp) {
+ byte[] a = buff.getBytes();
+ for (int i = buff.getPosition();; i++) {
+ if (((comp ? DESCENDING : ASCENDING).apply(a[i]) & 1) == 0) {
+ buff.setPosition(i + 1);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Encode the small positive floating point number val using
+ * the key encoding. The caller guarantees that val will be
+ * less than 1.0 and greater than 0.0.
+ *
+ * A floating point value is encoded as an integer exponent E
+ * and a mantissa M. The original value is equal to
+ * (M * 100^E). E is set to the smallest value
+ * possible without making M greater than or equal to 1.0.
+ *
+ * For this routine, E will always be zero or negative, since
+ * the original value is less than one. The encoding written by this routine
+ * is the ones-complement of the varint of the negative of E
+ * followed by the mantissa:
+ *
+ *
+ * Encoding: ~-E M + *+ * + * + * @param buff The destination to which encoded digits are written. + * @param val The value to encode. + * @param ecomp Write the compliment of
e to buff
+ * when ecomp is true.
+ * @param mcomp Write the compliment of M to buff
+ * when mcomp is true.
+ */
+ private static void encodeNumericSmall(ByteRange buff, BigDecimal val, boolean ecomp,
+ boolean mcomp) {
+ // TODO: can this be done faster?
+ // assert 0.0 < dec < 1.0
+ assert BigDecimal.ZERO.compareTo(val) < 0 && BigDecimal.ONE.compareTo(val) > 0;
+ int e = 0, d, startM;
+ Order ord = mcomp ? DESCENDING : ASCENDING;
+ while (val.compareTo(EN10) < 0) { val = val.movePointRight(8); e += 4; }
+ while (val.compareTo(EN2) < 0) { val = val.movePointRight(2); e++; }
+ putVaruint64(buff, e, ecomp);
+ startM = buff.getPosition();
+ for (int i = 0; i < 18 && val.compareTo(BigDecimal.ZERO) != 0; i++) {
+ val = val.movePointRight(2);
+ d = val.intValue();
+ buff.put((byte) ((2 * d + 1) & 0xff));
+ val = val.subtract(BigDecimal.valueOf(d));
+ }
+ buff.getBytes()[buff.getPosition() - 1] &= 0xfe;
+ ord.apply(buff.getBytes(), startM, buff.getPosition() - startM);
+ }
+
+ /**
+ * Encode the large positive floating point number val using
+ * the key encoding. The caller guarantees that val will be
+ * finite and greater than or equal to 1.0.
+ *
+ * A floating point value is encoded as an integer exponent E
+ * and a mantissa M. The original value is equal to
+ * (M * 100^E). E is set to the smallest value
+ * possible without making M greater than or equal to 1.0.
+ *
+ * Each centimal digit of the mantissa is stored in a byte. If the value of
+ * the centimal digit is X (hence X>=0 and
+ * X<=99) then the byte value will be 2*X+1 for
+ * every byte of the mantissa, except for the last byte which will be
+ * 2*X+0. The mantissa must be the minimum number of bytes
+ * necessary to represent the value; trailing X==0 digits are
+ * omitted. This means that the mantissa will never contain a byte with the
+ * value 0x00.
+ *
+ * If E > 10, then this routine writes of E as a
+ * varint followed by the mantissa as described above. Otherwise, if
+ * E <= 10, this routine only writes the mantissa and leaves
+ * the E value to be encoded as part of the opening byte of the
+ * field by the calling function.
+ *
+ *
+ * Encoding: M (if E<=10) + * E M (if E>10) + *+ * + *
+ * This routine returns the value of E.
+ *
e to buff
+ * when ecomp is true.
+ * @param mcomp Write the compliment of M to buff
+ * when mcomp is true.
+ * @return E(xponent) in base-100.
+ */
+ private static int encodeNumericLarge(ByteRange buff, BigDecimal val, boolean ecomp,
+ boolean mcomp) {
+ // TODO: can this be done faster?
+ // assert val >= 0.0
+ assert BigDecimal.ONE.compareTo(val) <= 0;
+ int e = 0, d, startM;
+ Order ord = mcomp ? DESCENDING : ASCENDING;
+ while (val.compareTo(E32) >= 0 && e <= 350) { val = val.movePointLeft(32); e +=16; }
+ while (val.compareTo(E8) >= 0 && e <= 350) { val = val.movePointLeft(8); e+= 4; }
+ while (val.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { val = val.movePointLeft(2); e++; }
+ if (e > 10) putVaruint64(buff, e, ecomp);
+ startM = buff.getPosition();
+ for (int i = 0; i < 18 && val.compareTo(BigDecimal.ZERO) != 0; i++) {
+ val = val.movePointRight(2);
+ d = val.intValue();
+ buff.put((byte) (2 * d + 1));
+ val = val.subtract(BigDecimal.valueOf(d));
+ }
+ buff.getBytes()[buff.getPosition() - 1] &= 0xfe;
+ ord.apply(buff.getBytes(), startM, buff.getPosition() - startM);
+ return e;
+ }
+
+ /**
+ * Encode a numerical value using the variable-length encoding.
+ */
+ public static void encodeNumeric(ByteRange buff, long val, Order ord) {
+ int e, i, start = buff.getPosition();
+ if (val == 0) {
+ buff.put(ZERO);
+ } else if (val <= -1) {
+ i = buff.getPosition();
+ buff.put(NEG_LARGE); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeNumericLarge(buff, BigDecimal.valueOf(val).negate(), true, true);
+ if (e <= 10) buff.put(i, (byte) (NEG_MED_MAX - e)); /* Medium negative number: 0x13-E, ~M */
+ } else {
+ i = buff.getPosition();
+ buff.put(POS_LARGE); /* Large positive number: 0x22, E, M */
+ e = encodeNumericLarge(buff, BigDecimal.valueOf(val), false, false);
+ if (e <= 10) buff.put(i, (byte) (POS_MED_MIN + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Encode a numerical value using the variable-length encoding.
+ */
+ public static void encodeNumeric(ByteRange buff, double val, Order ord) {
+ int start = buff.getPosition();
+ if (Double.isNaN(val)) {
+ buff.put(NAN);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ } else if (val == Double.NEGATIVE_INFINITY) {
+ buff.put(NEG_INF);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ } else if (val == Double.POSITIVE_INFINITY) {
+ buff.put(POS_INF);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ } else if (val == 0.0) {
+ buff.put(ZERO);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ } else {
+ encodeNumeric(buff, BigDecimal.valueOf(val), ord);
+ }
+ }
+
+ /**
+ * Encode a numerical value using the variable-length encoding.
+ */
+ public static void encodeNumeric(ByteRange buff, BigDecimal val, Order ord) {
+ int e, i, start = buff.getPosition();
+ if (null == val) {
+ encodeNull(buff, ord);
+ return;
+ } else if (BigDecimal.ZERO.compareTo(val) == 0) {
+ buff.put(ZERO);
+ } else if (NEG_ONE.compareTo(val) >= 0) { // v <= -1.0
+ i = buff.getPosition();
+ buff.put(NEG_LARGE); /* Large negative number: 0x08, ~E, ~M */
+ e = encodeNumericLarge(buff, val.negate(), true, true);
+ if (e <= 10) buff.put(i, (byte) (NEG_MED_MAX - e)); /* Medium negative number: 0x13-E, ~M */
+ } else if (BigDecimal.ZERO.compareTo(val) > 0) { // v < 0.0
+ buff.put(NEG_SMALL); /* Small negative number: 0x14, -E, ~M */
+ encodeNumericSmall(buff, val.negate(), false, true);
+ } else if (BigDecimal.ONE.compareTo(val) > 0) { // v < 1.0
+ buff.put(POS_SMALL); /* Small positive number: 0x16, ~-E, M */
+ encodeNumericSmall(buff, val, true, false);
+ } else {
+ i = buff.getPosition();
+ buff.put(POS_LARGE); /* Large positive number: 0x22, E, M */
+ e = encodeNumericLarge(buff, val, false, false);
+ if (e <= 10) buff.put(i, (byte) (POS_MED_MIN + e)); /* Medium positive number: 0x17+E, M */
+ }
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Encode a numerical value using the variable-length encoding.
+ */
+ public static void encodeNumeric(ByteRange buff, Numeric val, Order ord) {
+ if (null == val) {
+ encodeNull(buff, ord);
+ } else if (val.isInteger()) {
+ encodeNumeric(buff, val.longValue(), ord);
+ } else if (val.isReal()) {
+ encodeNumeric(buff, val.doubleValue(), ord);
+ } else {
+ encodeNumeric(buff, val.exactValue(), ord);
+ }
+ }
+
+ /**
+ * Decode a Numerical value from the variable-length encoding. The backing
+ * array is not modified through use of this method.
+ */
+ public static Numeric decodeNumeric(ByteRange buff) {
+ byte header = buff.get();
+ if (header == NULL || header == DESCENDING.apply(NULL))
+ return null;
+ int e = 0;
+ boolean dsc = (-1 == Integer.signum(header));
+ if (dsc) header = DESCENDING.apply(header);
+
+ if (header == NAN) {
+ return Numeric.NaN;
+ } else if (header == NEG_INF) {
+ return Numeric.NEGATIVE_INFINITY;
+ } else if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */
+ e = (int) getVaruint64(buff, !dsc);
+ return new Numeric(decodeSignificand(buff, e, !dsc).negate());
+ } else if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) {
+ /* Medium negative number: 0x13-E, ~M */
+ e = NEG_MED_MAX - header;
+ return new Numeric(decodeSignificand(buff, e, !dsc).negate());
+ } else if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */
+ e = (int) -getVaruint64(buff, dsc);
+ return new Numeric(decodeSignificand(buff, e, !dsc).negate());
+ } else if (header == ZERO) {
+ return Numeric.ZERO;
+ } else if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */
+ e = (int) -getVaruint64(buff, !dsc);
+ return new Numeric(decodeSignificand(buff, e, dsc));
+ } else if (header >= POS_MED_MIN && header <= POS_MED_MAX) {
+ /* Medium positive number: 0x17+E, M */
+ e = header - POS_MED_MIN;
+ return new Numeric(decodeSignificand(buff, e, dsc));
+ } else if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */
+ e = (int) getVaruint64(buff, dsc);
+ return new Numeric(decodeSignificand(buff, e, dsc));
+ } else if (header == POS_INF) {
+ return Numeric.POSITIVE_INFINITY;
+ } else {
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(header));
+ }
+ }
+
+ /**
+ * Encode a String value.
+ */
+ public static void encodeString(ByteRange buff, String val, Order ord) {
+ if (null == val) {
+ encodeNull(buff, ord);
+ return;
+ }
+ if (val.contains("\u0000"))
+ throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'");
+ int start = buff.getPosition();
+ buff.put(TEXT);
+ buff.put(val.getBytes(UTF8));
+ buff.put(TERM);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Decode a String value. The backing array is not modified through use of
+ * this method.
+ */
+ public static String decodeString(ByteRange buff) {
+ byte header = buff.get();
+ if (header == NULL || header == DESCENDING.apply(NULL))
+ return null;
+ assert header == TEXT || header == DESCENDING.apply(TEXT);
+ Order ord = header == TEXT ? ASCENDING : DESCENDING;
+ byte[] a = buff.getBytes();
+ int start = buff.getPosition(), i = start;
+ byte terminator = ord.apply(TERM);
+ while (a[i] != terminator) i++;
+ buff.setPosition(++i);
+ if (DESCENDING == ord) {
+ byte[] copy = Arrays.copyOfRange(a, start, i - 1);
+ ord.apply(copy);
+ return new String(copy, UTF8);
+ } else {
+ return new String(a, start, i - start - 1, UTF8);
+ }
+ }
+
+ /**
+ * Calculate the expected BlobVar encoded length based on unencoded length.
+ */
+ public static int blobVarEncodedLength(int len) {
+ if (0 == len)
+ return 2; // 1-byte header + 1-byte terminator
+ else
+ return (int)
+ Math.ceil(
+ (len * 8) // 8-bits per input byte
+ / 7.0) // 7-bits of input data per encoded byte, rounded up
+ + 1; // + 1-byte header
+ }
+
+ /**
+ * Calculate the expected BlobVar decoded length based on encoded length.
+ */
+ @VisibleForTesting
+ static int blobVarDecodedLength(int len) {
+ return
+ ((len
+ - 1) // 1-byte header
+ * 7) // 7-bits of payload per encoded byte
+ / 8; // 8-bits per byte
+ }
+
+ /**
+ * Encode a Blob value using a modified varint encoding scheme.
+ * + * This format encodes a byte[] value such that no limitations on the input + * value are imposed. The first byte encodes the encoding scheme that + * follows, 0x35. Each encoded byte thereafter has a header bit indicating + * whether there is another encoded byte following. A header bit of '1' + * indicates continuation of the encoding. A header bit of '0' indicates + * this byte encodes the final byte. An empty input value is a special case, + * wherein a NULL byte is used as a termination byte. The remaining 7 bits + * on each encoded byte carry the value payload. + *
+ */ + public static void encodeBlobVar(ByteRange buff, byte[] val, int offset, int len, Order ord) { + if (null == val) { + encodeNull(buff, ord); + return; + } + // Empty value is null-terminated. All other values are encoded as 7-bits per byte. + assert buff.getRemaining() >= blobVarEncodedLength(len) : "buffer overflow expected."; + int start = buff.getPosition(); + buff.put(BLOB_VAR); + if (0 == len) { + buff.put(TERM); + } else { + byte s = 1, t = 0; + for (int i = offset; i < len; i++) { + buff.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s))); + if (s < 7) { + t = (byte) (val[i] << (7 - s)); + s++; + } else { + buff.put((byte) (0x80 | val[i])); + s = 1; + t = 0; + } + } + if (s > 1) { + buff.put((byte) (0x7f & t)); + } else { + buff.getBytes()[buff.getPosition() - 1] &= 0x7f; + } + } + ord.apply(buff.getBytes(), start, buff.getPosition() - start); + } + + /** + * Encode a blob value using a modified varint encoding scheme. + */ + public static void encodeBlobVar(ByteRange buff, byte[] val, Order ord) { + encodeBlobVar(buff, val, 0, null != val ? val.length : 0, ord); + } + + /** + * Encode a blob value using a modified varint encoding scheme. + */ + public static void encodeBlobVar(ByteRange buff, ByteRange val, Order ord) { + if (null == val) { + encodeBlobVar(buff, (byte[]) null, ord); + } else { + encodeBlobVar(buff, val.getBytes(), val.getOffset() + val.getPosition(), val.getLength(), + ord); + } + } + + /** + * Decode a blob value that was encoded using BlobVar encoding. The backing array is not modified + * through use of this method. + */ + public static byte[] decodeBlobVar(ByteRange buff) { + byte header = buff.get(); + if (header == NULL || header == DESCENDING.apply(NULL)) + return null; + assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR); + Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING; + byte[] a = buff.getBytes(); + int start = buff.getPosition(), end; + if (a[start] == ord.apply(TERM)) { + // skip empty input buffer. + buff.get(); + return new byte[0]; + } + for (end = start; (byte) (ord.apply(a[end]) & 0x80) != TERM; end++) ; + end++; // increment end to 1-past last byte + // create ret buffer using length of encoded data + 1 (header byte) + ByteRange ret = new ByteRange(blobVarDecodedLength(end - start + 1)); + int s = 6; + byte t = (byte) ((ord.apply(a[start]) << 1) & 0xff); + for (int i = start + 1; i < end; i++) { + if (s == 7) { + ret.put((byte) (t | (ord.apply(a[i]) & 0x7f))); + i++; + } else { + ret.put((byte) (t | ((ord.apply(a[i]) & 0x7f) >>> s))); + } + if (i == end) break; + t = (byte) ((ord.apply(a[i]) << 8 - s) & 0xff); + s = s == 1 ? 7 : s - 1; + } + buff.setPosition(end); + assert t == 0 : "Unexpected bits remaining after decoding blob."; + return ret.getBytes(); + } + + /** + * Encode a Blob value as a byte-for-byte copy. + */ + public static void encodeBlobCopy(ByteRange buff, byte[] val, int offset, int len, Order ord) { + if (null == val) { + encodeNull(buff, ord); + if (DESCENDING == ord) { + // DESCENDING ordered BlobCopy requires a termination bit to preserve + // sort-order semantics of null values. + buff.put(ord.apply(TERM)); + } + return; + } + // Blobs as final entry in a compound key are written unencoded. + int overhead = ASCENDING == ord ? 1 : 2; + assert buff.getRemaining() >= len + overhead; + for (int i = offset; i < offset + len; i++) { + if (val[i] == 0x00) + throw new IllegalArgumentException("0x00 bytes not permitted in value."); + } + int start = buff.getPosition(); + buff.put(BLOB_COPY); + buff.put(val, offset, len); + // DESCENDING ordered BlobCopy requires a termination bit to preserve + // sort-order semantics of null values. + if (DESCENDING == ord) buff.put(TERM); + ord.apply(buff.getBytes(), start, buff.getPosition() - start); + } + + /** + * Encode a Blob value as a byte-for-byte copy. + */ + public static void encodeBlobCopy(ByteRange buff, byte[] val, Order ord) { + encodeBlobCopy(buff, val, 0, null != val ? val.length : 0, ord); + } + + /** + * Encode a Blob value as a byte-for-byte copy. + */ + public static void encodeBlobCopy(ByteRange buff, ByteRange val, Order ord) { + if (null == val) { + encodeBlobCopy(buff, (byte[]) null, ord); + } else { + encodeBlobCopy( + buff, val.getBytes(), val.getOffset() + val.getPosition(), val.getLength(), ord); + } + } + + /** + * Decode a Blob value, byte-for-byte copy. The backing array is not + * modified through use of this method. + */ + public static byte[] decodeBlobCopy(ByteRange buff) { + byte header = buff.get(); + if (header == NULL) { + return null; + } else if (header == DESCENDING.apply(NULL)) { + buff.get(); // read DESCENDING order termination bit. + return null; + } + + assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY); + Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING; + int length = buff.getLength() - buff.getPosition() - (ASCENDING == ord ? 0 : 1); + byte[] ret = new byte[length]; + buff.get(ret); + if (DESCENDING == ord) buff.get(); // throw away the termination marker. + ord.apply(ret, 0, ret.length); + return ret; + } + + /** + * Encode a null value. + */ + public static void encodeNull(ByteRange buff, Order ord) { + buff.put(ord.apply(NULL)); + } + + /** + * Encode anint32 value using the fixed-length encoding.
+ */
+ public static void encodeInt32(ByteRange buff, int val, Order ord) {
+ int start = buff.getPosition();
+ buff.put(FIXED_INT32);
+ buff.put((byte) ((val >> 24) ^ 0x80))
+ .put((byte) (val >> 16))
+ .put((byte) (val >> 8))
+ .put((byte) val);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Decode an int32 value. The backing array is not modified
+ * through use of this method.
+ */
+ public static int decodeInt32(ByteRange buff) {
+ byte header = buff.get();
+ assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32);
+ Order ord = header == 0x27 ? ASCENDING : DESCENDING;
+ int val = (ord.apply(buff.get()) ^ 0x80) & 0xff;
+ for (int i = 1; i < 4; i++) {
+ val = (val << 8) + (ord.apply(buff.get()) & 0xff);
+ }
+ return val;
+ }
+
+ /**
+ * Encode an int64 value using the fixed-length encoding.
+ */
+ public static void encodeInt64(ByteRange buff, long val, Order ord) {
+ int start = buff.getPosition();
+ buff.put(FIXED_INT64);
+ buff.put((byte) ((val >> 56) ^ 0x80))
+ .put((byte) (val >> 48))
+ .put((byte) (val >> 40))
+ .put((byte) (val >> 32))
+ .put((byte) (val >> 24))
+ .put((byte) (val >> 16))
+ .put((byte) (val >> 8))
+ .put((byte) val);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Decode an int64 value. The backing array is not modified
+ * through use of this method.
+ */
+ public static long decodeInt64(ByteRange buff) {
+ byte header = buff.get();
+ assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64);
+ Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING;
+ long val = (ord.apply(buff.get()) ^ 0x80) & 0xff;
+ for (int i = 1; i < 8; i++) {
+ val = (val << 8) + (ord.apply(buff.get()) & 0xff);
+ }
+ return val;
+ }
+
+ /**
+ * Encode a 32-bit floating point value using the fixed-length encoding.
+ * @see #decodeFloat32(ByteRange)
+ */
+ public static void encodeFloat32(ByteRange buff, float val, Order ord) {
+ int start = buff.getPosition();
+ int i = Float.floatToIntBits(val);
+ i ^= ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE);
+ buff.put(FIXED_FLOAT32);
+ buff.put((byte) (i >> 24))
+ .put((byte) (i >> 16))
+ .put((byte) (i >> 8))
+ .put((byte) i);
+ ord.apply(buff.getBytes(), start, buff.getPosition() - start);
+ }
+
+ /**
+ * Decode a 32-bit floating point value using the fixed-length encoding.
+ * @see #encodeFloat32(ByteRange, float, Order)
+ */
+ public static float decodeFloat32(ByteRange buff) {
+ byte header = buff.get();
+ assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32);
+ Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING;
+ int val = ord.apply(buff.get()) & 0xff;
+ for (int i = 1; i < 4; i++) {
+ val = (val << 8) + (ord.apply(buff.get()) & 0xff);
+ }
+ val ^= (~val >> Integer.SIZE - 1) | Integer.MIN_VALUE;
+ return Float.intBitsToFloat(val);
+ }
+
+ /**
+ * Encode a 64-bit floating point value using the fixed-length encoding.
+ * + * This format ensures the following total ordering of floating point + * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... < + * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ... + * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN + *
+ * Floating point numbers are encoded as specified in IEEE 754. A 64-bit + * double precision float consists of a sign bit, 11-bit unsigned exponent + * encoded in offset-1023 notation, and a 52-bit significand. The format is + * described further in the Double Precision + * Floating Point Wikipedia page + *+ * The value of a normal float is -1 sign bit × + * 2exponent - 1023 × 1.significand + *
+ *+ * The IEE754 floating point format already preserves sort ordering for + * positive floating point numbers when the raw bytes are compared in most + * significant byte order. This is discussed further at http://www.cygnus-software.com/papers/comparingfloats/comparingfloats. + * htm + *
+ *+ * Thus, we need only ensure that negative numbers sort in the the exact + * opposite order as positive numbers (so that say, negative infinity is + * less than negative 1), and that all negative numbers compare less than + * any positive number. To accomplish this, we invert the sign bit of all + * floating point numbers, and we also invert the exponent and significand + * bits if the floating point number was negative. + *
+ *
+ * More specifically, we first store the floating point bits into a 64-bit
+ * long l using {@link Double#doubleToLongBits}. This method
+ * collapses all NaNs into a single, canonical NaN value but otherwise
+ * leaves the bits unchanged. We then compute
+ *
+ * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE + *+ *
+ * which inverts the sign bit and XOR's all other bits with the sign bit
+ * itself. Comparing the raw bytes of l in most significant
+ * byte order is equivalent to performing a double precision floating point
+ * comparison on the underlying bits (ignoring NaN comparisons, as NaNs
+ * don't compare equal to anything when performing floating point
+ * comparisons).
+ *
+ * The resulting long integer is then converted into a byte array by + * serializing the long one byte at a time in most significant byte order. + * The serialized integer is prefixed by a single header byte. All + * serialized values are 9 bytes in length. + *
+ */ + public static void encodeFloat64(ByteRange buff, double val, Order ord) { + int start = buff.getPosition(); + long lng = Double.doubleToLongBits(val); + lng ^= ((lng >> Long.SIZE - 1) | Long.MIN_VALUE); + buff.put(FIXED_FLOAT64); + buff.put((byte) (lng >> 56)) + .put((byte) (lng >> 48)) + .put((byte) (lng >> 40)) + .put((byte) (lng >> 32)) + .put((byte) (lng >> 24)) + .put((byte) (lng >> 16)) + .put((byte) (lng >> 8)) + .put((byte) lng); + ord.apply(buff.getBytes(), start, buff.getPosition() - start); + } + + /** + * Decode a 64-bit floating point value using the fixed-length encoding. + * @see #encodeFloat64(ByteRange, double, Order) + */ + public static double decodeFloat64(ByteRange buff) { + byte header = buff.get(); + assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64); + Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING; + long val = ord.apply(buff.get()) & 0xff; + for (int i = 1; i < 8; i++) { + val = (val << 8) + (ord.apply(buff.get()) & 0xff); + } + val ^= (~val >> Long.SIZE - 1) | Long.MIN_VALUE; + return Double.longBitsToDouble(val); + } + + /** + * Skipbuff's cursor forward one encoded value.
+ */
+ public static void skip(ByteRange buff) {
+ byte x = buff.get();
+ Order ord = (-1 == Integer.signum(x)) ? DESCENDING : ASCENDING;
+ x = ord.apply(x);
+
+ switch (x) {
+ case NULL:
+ case NEG_INF:
+ return;
+ case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */
+ skipVaruint64(buff, DESCENDING != ord);
+ skipSignificand(buff, DESCENDING != ord);
+ return;
+ case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */
+ case NEG_MED_MIN + 0x01:
+ case NEG_MED_MIN + 0x02:
+ case NEG_MED_MIN + 0x03:
+ case NEG_MED_MIN + 0x04:
+ case NEG_MED_MIN + 0x05:
+ case NEG_MED_MIN + 0x06:
+ case NEG_MED_MIN + 0x07:
+ case NEG_MED_MIN + 0x08:
+ case NEG_MED_MIN + 0x09:
+ case NEG_MED_MAX:
+ skipSignificand(buff, DESCENDING != ord);
+ return;
+ case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */
+ skipVaruint64(buff, DESCENDING == ord);
+ skipSignificand(buff, DESCENDING != ord);
+ return;
+ case ZERO:
+ return;
+ case POS_SMALL: /* Small positive number: 0x16, ~-E, M */
+ skipVaruint64(buff, DESCENDING != ord);
+ skipSignificand(buff, DESCENDING == ord);
+ return;
+ case POS_MED_MIN: /* Medium positive number: 0x17+E, M */
+ case POS_MED_MIN + 0x01:
+ case POS_MED_MIN + 0x02:
+ case POS_MED_MIN + 0x03:
+ case POS_MED_MIN + 0x04:
+ case POS_MED_MIN + 0x05:
+ case POS_MED_MIN + 0x06:
+ case POS_MED_MIN + 0x07:
+ case POS_MED_MIN + 0x08:
+ case POS_MED_MIN + 0x09:
+ case POS_MED_MAX:
+ skipSignificand(buff, DESCENDING == ord);
+ return;
+ case POS_LARGE: /* Large positive number: 0x22, E, M */
+ skipVaruint64(buff, DESCENDING == ord);
+ skipSignificand(buff, DESCENDING == ord);
+ return;
+ case POS_INF:
+ return;
+ case NAN:
+ return;
+ case FIXED_INT32:
+ buff.setPosition(buff.getPosition() + 4);
+ return;
+ case FIXED_INT64:
+ buff.setPosition(buff.getPosition() + 8);
+ return;
+ case FIXED_FLOAT32:
+ buff.setPosition(buff.getPosition() + 4);
+ return;
+ case FIXED_FLOAT64:
+ buff.setPosition(buff.getPosition() + 8);
+ return;
+ case TEXT:
+ // for null-terminated values, skip to the end.
+ do {
+ x = ord.apply(buff.get());
+ } while (x != TERM);
+ return;
+ case BLOB_VAR:
+ // read until we find a 0 in the MSB
+ do {
+ x = ord.apply(buff.get());
+ } while ((byte) (x & 0x80) != TERM);
+ return;
+ case BLOB_COPY:
+ if (Order.DESCENDING == ord) {
+ // if descending, read to termination byte.
+ do {
+ x = ord.apply(buff.get());
+ } while (x != TERM);
+ } else {
+ // otherwise, just skip to the end.
+ buff.setPosition(buff.getLength());
+ }
+ return;
+ default:
+ throw new IllegalArgumentException("unexpected value in first byte: 0x"
+ + Long.toHexString(x));
+ }
+ }
+
+ /**
+ * Return the number of encoded entries remaining in buff. The
+ * state of buff is not modified through use of this method.
+ */
+ public static int length(ByteRange buff) {
+ ByteRange b = buff.shallowCopy();
+ b.setPosition(buff.getPosition());
+ int cnt = 0;
+ for (cnt = 0; b.getPosition() != b.getLength(); cnt++) { skip(b); }
+ return cnt;
+ }
+}
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/types/TestOrderedBlob.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/types/TestOrderedBlob.java
new file mode 100644
index 0000000..2463be5
--- /dev/null
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/types/TestOrderedBlob.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.types;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.ByteRangeUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Test;
+
+public class TestOrderedBlob {
+
+ static final byte[][] VALUES = new byte[][] {
+ null, Bytes.toBytes(""), Bytes.toBytes("1"), Bytes.toBytes("22"), Bytes.toBytes("333"),
+ Bytes.toBytes("4444"), Bytes.toBytes("55555"), Bytes.toBytes("666666"),
+ Bytes.toBytes("7777777"), Bytes.toBytes("88888888"), Bytes.toBytes("999999999"),
+ };
+
+ @Test
+ public void testEncodedLength() {
+ ByteRange buff = new ByteRange(20);
+ for (DataType
+ * Building sqlite4 with -DVARINT_TOOL provides this reference:
+ * $ ./varint_tool 240 2287 67823 16777215 4294967295 1099511627775
+ * 281474976710655 72057594037927935 18446744073709551615
+ *
+ * 240 = f0
+ * 2287 = f8ff
+ * 67823 = f9ffff
+ * 16777215 = faffffff
+ * 4294967295 = fbffffffff
+ * 1099511627775 = fcffffffffff
+ * 281474976710655 = fdffffffffffff
+ * 72057594037927935 = feffffffffffffff
+ * 9223372036854775807 = ff7fffffffffffffff (Long.MAX_VAL)
+ * 9223372036854775808 = ff8000000000000000 (Long.MIN_VAL)
+ * 18446744073709551615 = ffffffffffffffffff