len bytes of data from the given offset into an array of bytes.
+ * @param offset The src offset within this ByteInput from where data to be read.
+ * @param out Destination byte array to read data into.
+ * @param outOffset Offset within the the out byte[] where data to be read into.
+ * @param len The number of bytes to read.
+ * @return The number of bytes read from ByteInput
+ */
+ public abstract int read(int offset, byte[] out, int outOffset, int len);
+
+ /**
+ * Reads bytes of data from the given offset into given {@link ByteBuffer}.
+ * @param offset he src offset within this ByteInput from where data to be read.
+ * @param out Destination {@link ByteBuffer} to read data into.
+ * @return The number of bytes read from ByteInput
+ */
+ public abstract int read(int offset, ByteBuffer out);
+
+ /**
+ * @return Total number of bytes in this ByteInput.
+ */
+ public abstract int size();
+}
diff --git a/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/com/google/protobuf/ByteInputByteString.java b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/com/google/protobuf/ByteInputByteString.java
new file mode 100644
index 0000000..1949602
--- /dev/null
+++ b/hbase-protocol-shaded/src/main/java/org/apache/hadoop/hbase/shaded/com/google/protobuf/ByteInputByteString.java
@@ -0,0 +1,249 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package org.apache.hadoop.hbase.shaded.com.google.protobuf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InvalidObjectException;
+import java.io.ObjectInputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A {@link ByteString} that wraps around a {@link ByteInput}.
+ */
+final class ByteInputByteString extends ByteString.LeafByteString {
+ private final ByteInput buffer;
+ private final int offset, length;
+
+ ByteInputByteString(ByteInput buffer, int offset, int length) {
+ if (buffer == null) {
+ throw new NullPointerException("buffer");
+ }
+ this.buffer = buffer;
+ this.offset = offset;
+ this.length = length;
+ }
+
+ // =================================================================
+ // Serializable
+
+ /**
+ * Magic method that lets us override serialization behavior.
+ */
+ private Object writeReplace() {
+ return ByteString.wrap(toByteArray());
+ }
+
+ /**
+ * Magic method that lets us override deserialization behavior.
+ */
+ private void readObject(@SuppressWarnings("unused") ObjectInputStream in) throws IOException {
+ throw new InvalidObjectException("ByteInputByteString instances are not to be serialized directly");// TODO check here
+ }
+
+ // =================================================================
+
+ @Override
+ public byte byteAt(int index) {
+ return buffer.read(getAbsoluteOffset(index));
+ }
+
+ private int getAbsoluteOffset(int relativeOffset) {
+ return this.offset + relativeOffset;
+ }
+
+ @Override
+ public int size() {
+ return length;
+ }
+
+ @Override
+ public ByteString substring(int beginIndex, int endIndex) {
+ if (beginIndex < 0 || beginIndex >= size() || endIndex < beginIndex || endIndex >= size()) {
+ throw new IllegalArgumentException(
+ String.format("Invalid indices [%d, %d]", beginIndex, endIndex));
+ }
+ return new ByteInputByteString(this.buffer, getAbsoluteOffset(beginIndex), endIndex - beginIndex);
+ }
+
+ @Override
+ protected void copyToInternal(
+ byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
+ this.buffer.read(getAbsoluteOffset(sourceOffset), target, targetOffset, numberToCopy);
+ }
+
+ @Override
+ public void copyTo(ByteBuffer target) {
+ this.buffer.read(this.offset, target);
+ }
+
+ @Override
+ public void writeTo(OutputStream out) throws IOException {
+ out.write(toByteArray());// TODO
+ }
+
+ @Override
+ boolean equalsRange(ByteString other, int offset, int length) {
+ return substring(0, length).equals(other.substring(offset, offset + length));
+ }
+
+ @Override
+ void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite) throws IOException {
+ byte[] buf = ByteBufferWriter.getOrCreateBuffer(numberToWrite);
+ this.buffer.read(getAbsoluteOffset(sourceOffset), buf, 0, numberToWrite);
+ out.write(buf, 0, numberToWrite);
+ }
+
+ @Override
+ void writeTo(ByteOutput output) throws IOException {
+ output.writeLazy(toByteArray(), 0, length);
+ }
+
+ @Override
+ public ByteBuffer asReadOnlyByteBuffer() {
+ return ByteBuffer.wrap(toByteArray()).asReadOnlyBuffer();
+ }
+
+ @Override
+ public ListSelects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct) @@ -610,6 +638,169 @@ final class Utf8 { } } + public boolean isValidUtf8(ByteInput buffer, int index, int limit) { + return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE; + } + + int partialIsValidUtf8(int state, ByteInput bytes, int index, int limit) { + if (state != COMPLETE) { + // The previous decoding operation was incomplete (or malformed). + // We look for a well-formed sequence consisting of bytes from + // the previous decoding operation (stored in state) together + // with bytes from the array slice. + // + // We expect such "straddler characters" to be rare. + + if (index >= limit) { // No bytes? No progress. + return state; + } + int byte1 = (byte) state; + // byte1 is never ASCII. + if (byte1 < (byte) 0xE0) { + // two-byte form + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + // byte2 trailing-byte test + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + // Get byte2 from saved state or array + int byte2 = (byte) ~(state >> 8); + if (byte2 == 0) { + byte2 = bytes.read(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } + if (byte2 > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // illegal surrogate codepoint? + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + // Get byte2 and byte3 from saved state or array + int byte2 = (byte) ~(state >> 8); + int byte3 = 0; + if (byte2 == 0) { + byte2 = bytes.read(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2); + } + } else { + byte3 = (byte) (state >> 16); + } + if (byte3 == 0) { + byte3 = bytes.read(index++); + if (index >= limit) { + return incompleteStateFor(byte1, byte2, byte3); + } + } + + // If we were called with state == MALFORMED, then byte1 is 0xFF, + // which never occurs in well-formed UTF-8, and so we will return + // MALFORMED again below. + + if (byte2 > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || byte3 > (byte) 0xBF + // byte4 trailing-byte test + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + + return partialIsValidUtf8(bytes, index, limit); + } + + private static int partialIsValidUtf8(ByteInput bytes, int index, int limit) { + // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this). + // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII). + while (index < limit && bytes.read(index) >= 0) { + index++; + } + + return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, index, limit); + } + + private static int partialIsValidUtf8NonAscii(ByteInput bytes, int index, int limit) { + for (;;) { + int byte1, byte2; + + // Optimize for interior runs of ASCII bytes. + do { + if (index >= limit) { + return COMPLETE; + } + } while ((byte1 = bytes.read(index++)) >= 0); + + if (byte1 < (byte) 0xE0) { + // two-byte form + + if (index >= limit) { + // Incomplete sequence + return byte1; + } + + // Simultaneously checks for illegal trailing-byte in + // leading position and overlong 2-byte form. + if (byte1 < (byte) 0xC2 + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else if (byte1 < (byte) 0xF0) { + // three-byte form + + if (index >= limit - 1) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes.read(index++)) > (byte) 0xBF + // overlong? 5 most significant bits must not all be zero + || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) + // check for illegal surrogate codepoints + || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) + // byte3 trailing-byte test + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } else { + // four-byte form + + if (index >= limit - 2) { // incomplete sequence + return incompleteStateFor(bytes, index, limit); + } + if ((byte2 = bytes.read(index++)) > (byte) 0xBF + // Check that 1 <= plane <= 16. Tricky optimized form of: + // if (byte1 > (byte) 0xF4 || + // byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 || + // byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F) + || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 + // byte3 trailing-byte test + || bytes.read(index++) > (byte) 0xBF + // byte4 trailing-byte test + || bytes.read(index++) > (byte) 0xBF) { + return MALFORMED; + } + } + } + } + /** * Encodes an input character sequence ({@code in}) to UTF-8 in the target array ({@code out}). * For a string, this method is similar to