From e05af0228d4616bb168ce5276bb6ddf3b526efec Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Mon, 5 Aug 2013 14:21:33 -0700 Subject: [PATCH] HBASE-9091 Introduce PositionedByteRange Add PositionedByteRange as an extension of ByteRange. Includes a position marker for tracking a consumer's place within a range. Also update and clarify documentation. This class starts to become a mutable alternative to java.nio.HeapByteBuffer. --- .../org/apache/hadoop/hbase/util/ByteRange.java | 384 +++++++++++++++------ .../apache/hadoop/hbase/util/ByteRangeTool.java | 65 ---- .../apache/hadoop/hbase/util/ByteRangeUtils.java | 70 ++++ .../hadoop/hbase/util/PositionedByteRange.java | 254 ++++++++++++++ .../apache/hadoop/hbase/util/TestByteRange.java | 18 +- .../hadoop/hbase/util/TestPositionedByteRange.java | 68 ++++ .../codec/prefixtree/encode/row/RowNodeWriter.java | 4 +- .../prefixtree/encode/tokenize/TokenizerNode.java | 2 +- .../codec/prefixtree/column/TestColumnBuilder.java | 4 +- .../column/data/TestColumnDataSimple.java | 6 +- 10 files changed, 694 insertions(+), 181 deletions(-) delete mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeUtils.java create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/util/PositionedByteRange.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestPositionedByteRange.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java index 6f91861..ae9a93c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java @@ -18,98 +18,182 @@ package org.apache.hadoop.hbase.util; - - +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; /** - * Lightweight, reusable class for specifying ranges of byte[]'s. CompareTo and equals methods are - * lexicographic, which is native to HBase. - *

+ * Lightweight, reusable class for specifying ranges of byte[]'s. + *

+ * {@code ByteRange} maintains an underlying byte[] and a viewport into that + * byte[] as a range of bytes. The {@code ByteRange} is a mutable, reusable + * object, so the underlying byte[] can be modified after instantiation. This + * is done using the {@link #set(byte[])} and {@link #unset()} methods. Direct + * access to the byte[] is also available via {@link #getBytes()}. The viewport + * is defined by an {@code offset} into the byte[] and a {@code length}. The + * range of bytes is 0-indexed, and is accessed by index via the + * {@link #get(int)} and {@link #put(int, byte))} methods. + *

+ *

* This class differs from ByteBuffer: - *

  • On-heap bytes only - *
  • Implements equals, hashCode, and compareTo so that it can be used in standard java - * Collections, similar to String. - *
  • Does not maintain mark/position iterator state inside the class. Doing so leads to many bugs - * in complex applications. - *
  • Allows the addition of simple core methods like this.copyTo(that, offset). - *
  • Can be reused in tight loops like a major compaction which can save significant amounts of - * garbage. - *
  • (Without reuse, we throw off garbage like this thing: - * http://www.youtube.com/watch?v=lkmBH-MjZF4 - *

    - * Mutable, and always evaluates equals, hashCode, and compareTo based on the current contents. - *

    - * Can contain convenience methods for comparing, printing, cloning, spawning new arrays, copying to - * other arrays, etc. Please place non-core methods into {@link ByteRangeTool}. - *

    - * We may consider converting this to an interface and creating separate implementations for a - * single byte[], a paged byte[] (growable byte[][]), a ByteBuffer, etc + *

  • On-heap bytes only
  • + *
  • Raw {@code byte} access only; does not encode other primitives.
  • + *
  • Implements {@link #equals(Object)}, {@link #hashCode()}, and + * {@link #compareTo(ByteRange)} so that it can be used in standard java + * Collections. Comparison operations are lexicographic, which is native to + * HBase.
  • + *
  • Allows the addition of simple core methods like the deep and shallow + * copy methods.
  • + *
  • Can be reused in tight loops like a major compaction which can save + * significant amounts of garbage. (Without reuse, we throw off garbage like + * this thing.)
  • + *

    + *

    + * Mutable, and always evaluates {@link #equals(Object)}, {@link #hashCode()}, + * and {@link #compareTo(ByteRange)} based on the current contents. + *

    + *

    + * Can contain convenience methods for comparing, printing, cloning, spawning + * new arrays, copying to other arrays, etc. Please place non-core methods into + * {@link ByteRangeUtils}. + *

    + *

    + * We may consider converting this to an interface and creating separate + * implementations for a single byte[], a paged byte[] (growable byte[][]), a + * ByteBuffer, etc + *

    */ +@InterfaceAudience.Public +@InterfaceStability.Evolving public class ByteRange implements Comparable { private static final int UNSET_HASH_VALUE = -1; - - /********************** fields *****************************/ - - // Do not make these final, as the intention is to reuse objects of this class + // Note to maintainers: Do not make these final, as the intention is to + // reuse objects of this class /** - * The array containing the bytes in this range. It will be >= length. + * The array containing the bytes in this range. It will be >= length. */ - private byte[] bytes; + protected byte[] bytes; /** - * The index of the first byte in this range. ByteRange.get(0) will return bytes[offset]. + * The index of the first byte in this range. {@code ByteRange.get(0)} will + * return bytes[offset]. */ - private int offset; + protected int offset; /** * The number of bytes in the range. Offset + length must be <= bytes.length */ - private int length; + protected int length; /** - * Variable for lazy-caching the hashCode of this range. Useful for frequently used ranges, - * long-lived ranges, or long ranges. + * Variable for lazy-caching the hashCode of this range. Useful for + * frequently used ranges, long-lived ranges, or long ranges. */ private int hash = UNSET_HASH_VALUE; - - /********************** construct ***********************/ - + /** + * Create a new {@code ByteRange} lacking a backing array and with an + * undefined viewport. + */ public ByteRange() { - set(new byte[0]);//Could probably get away with a null array if the need arises. + unset(); } + /** + * Create a new {@code ByteRange} over a new backing array of size + * {@code capacity}. The range's offset and length are 0 and {@code capacity}, + * respectively. + * @param capacity the size of the backing array. + */ + public ByteRange(int capacity) { + this(new byte[capacity]); + } + + /** + * Create a new {@code ByteRange} over the provided {@code bytes}. + * @param bytes The array to wrap. + */ public ByteRange(byte[] bytes) { set(bytes); } + /** + * Create a new {@code ByteRange} over the provided {@code bytes}. + * @param bytes The array to wrap. + * @param offset The offset into {@code bytes} considered the beginning + * of this range. + * @param length The length of this range. + */ public ByteRange(byte[] bytes, int offset, int length) { set(bytes, offset, length); } + // + // methods for managing the backing array and range viewport + // - /********************** write methods *************************/ + /** + * The underlying byte[]. + */ + public byte[] getBytes() { + return bytes; + } - public ByteRange clear() { + /** + * Nullifies this ByteRange. That is, it becomes a husk, being a range over + * no byte[] whatsoever. + * @return this + */ + public ByteRange unset() { clearHashCache(); - bytes = null; - offset = 0; - length = 0; + this.bytes = null; + this.offset = 0; + this.length = 0; return this; } + /** + * Reuse this {@code ByteRange} over a new byte[]. {@code offset} is set to + * 0 and {@code length} is set to {@code capacity}. + * @param capacity the size of a new byte[]. + * @return this + */ + public ByteRange set(int capacity) { + return set(new byte[capacity]); + } + + /** + * Reuse this {@code ByteRange} over a new byte[]. {@code offset} is set to + * 0 and {@code length} is set to {@code bytes.length}. A null {@code bytes} + * IS supported, in which case this method will behave equivalently to + * {@link #unset()}. + * @param bytes the array to wrap. + * @return this + */ public ByteRange set(byte[] bytes) { + if (null == bytes) return unset(); clearHashCache(); this.bytes = bytes; this.offset = 0; - this.length = ArrayUtils.length(bytes); + this.length = bytes.length; return this; } + /** + * Reuse this {@code ByteRange} over a new byte[]. A null {@code bytes} IS + * supported, in which case this method will behave equivalently to + * {@link #unset()}, regardless of the values of {@code offset} and + * {@code length}. + * @param bytes The array to wrap. + * @param offset The offset into {@code bytes} considered the beginning of + * this range. + * @param length The length of this range. + * @return this. + */ public ByteRange set(byte[] bytes, int offset, int length) { + if (null == bytes) return unset(); clearHashCache(); this.bytes = bytes; this.offset = offset; @@ -117,25 +201,144 @@ public class ByteRange implements Comparable { return this; } - public void setLength(int length) { + /** + * The offset, the index into the underlying byte[] at which this range + * begins. + * @see #getBytes(); + */ + public int getOffset() { + return offset; + } + + /** + * Update the beginning of this range. {@code offset + length} may not be + * greater than {@code bytes.length}. + * @param offset the new start of this range. + * @return this. + */ + public ByteRange setOffset(int offset) { + clearHashCache(); + this.offset = offset; + return this; + } + + /** + * The length of the range. + */ + public int getLength() { + return length; + } + + /** + * Update the length of this range. {@code offset + length} should not be + * greater than {@code bytes.length}. + * @param length The new length of this range. + * @return this. + */ + public ByteRange setLength(int length) { clearHashCache(); this.length = length; + return this; } + /** + * @return true when this range is of zero length, false otherwise. + */ + public boolean isEmpty() { + return isEmpty(this); + } + + /** + * @return true when {@code range} is of zero length, false otherwise. + */ + public static boolean isEmpty(ByteRange range) { + return range == null || range.length == 0; + } - /*********** read methods (add non-core methods to ByteRangeUtils) *************/ + // + // methods for managing position and retrieving data + // /** - * @param index zero-based index - * @return single byte at index + * Retrieve the byte at position {@code index}. + * @param index zero-based index into this range. + * @return single byte at index. */ public byte get(int index) { return bytes[offset + index]; } /** - * Instantiate a new byte[] with exact length, which is at least 24 bytes + length. Copy the - * contents of this range into it. + * Fill {@code dst} with bytes from the range, starting from {@code index}. + * @param index zero-based index into this range. + * @param dst the destination of the copy. + * @return this. + */ + public ByteRange get(int index, byte[] dst) { + if (0 == dst.length) return this; + return get(index, dst, 0, dst.length); + } + + /** + * Fill {@code dst} with bytes from the range, starting from {@code index}. + * {@code length} bytes are copied into {@code dst}, starting at {@code offset}. + * @param index zero-based index into this range. + * @param dst the destination of the copy. + * @param offset the offset into {@code dst} to start the copy. + * @param length the number of bytes to copy into {@code dst}. + * @return this. + */ + public ByteRange get(int index, byte[] dst, int offset, int length) { + if (0 == length) return this; + System.arraycopy(this.bytes, this.offset + index, dst, offset, length); + return this; + } + + /** + * Store {@code val} at {@code index}. + * @param index the index in the range where {@code val} is stored. + * @param val the value to store. + * @return this. + */ + public ByteRange put(int index, byte val) { + bytes[offset + index] = val; + return this; + } + + /** + * Store {@code val} at {@code index}. + * @param index the index in the range where {@code val} is stored. + * @param val the value to store. + * @return this. + */ + public ByteRange put(int index, byte[] val) { + if (0 == val.length) return this; + return put(index, val, 0, val.length); + } + + /** + * Store {@code length{@code bytes from {@code val} into this range, starting at + * {@code index}. Bytes from {@code val} are copied starting at {@code offset} + * into the range. + * @param index position in this range to start the copy. + * @param val the value to store. + * @param offset the offset in {@code val} from which to start copying. + * @param length the number of bytes to copy from {@code val}. + * @return this. + */ + public ByteRange put(int index, byte[] val, int offset, int length) { + if (0 == length) return this; + System.arraycopy(val, offset, this.bytes, this.offset + index, length); + return this; + } + + // + // methods for duplicating the current instance + // + + /** + * Instantiate a new byte[] with exact length, which is at least 24 bytes + + * length. Copy the contents of this range into it. * @return The newly cloned byte[]. */ public byte[] deepCopyToNewArray() { @@ -145,20 +348,19 @@ public class ByteRange implements Comparable { } /** - * Create a new ByteRange with new backing byte[] and copy the state of this range into the new - * range. Copy the hash over if it is already calculated. + * Create a new {@code ByteRange} with new backing byte[] and copy the state + * of this range into the new range. Copy the hash over if it is already + * calculated. Offset, position, and length are not preserved. * @return Deep copy */ public ByteRange deepCopy() { ByteRange clone = new ByteRange(deepCopyToNewArray()); - if (isHashCached()) { - clone.hash = hash; - } return clone; } /** - * Wrapper for System.arraycopy. Copy the contents of this range into the provided array. + * Wrapper for System.arraycopy. Copy the contents of this range into the + * provided array. * @param destination Copy to this array * @param destinationOffset First index in the destination array. */ @@ -167,9 +369,10 @@ public class ByteRange implements Comparable { } /** - * Wrapper for System.arraycopy. Copy the contents of this range into the provided array. - * @param innerOffset Start copying from this index in this source ByteRange. First byte copied is - * bytes[offset + innerOffset] + * Wrapper for System.arraycopy. Copy the contents of this range into the + * provided array. + * @param innerOffset Start copying from this index in this source + * ByteRange. First byte copied is bytes[offset + innerOffset] * @param copyLength Copy this many bytes * @param destination Copy to this array * @param destinationOffset First index in the destination array. @@ -180,12 +383,28 @@ public class ByteRange implements Comparable { } /** - * Create a new ByteRange that points at this range's byte[]. The new range can have different - * values for offset and length, but modifying the shallowCopy will modify the bytes in this - * range's array. Pass over the hash code if it is already cached. + * Create a new {@code ByteRange} that points at this range's byte[]. + * Modifying the shallowCopy will modify the bytes in this range's array. + * Pass over the hash code if it is already cached. Position is not preserved + * in the copy. + * @return new {@code ByteRange} object referencing this range's byte[]. + */ + public ByteRange shallowCopy() { + ByteRange clone = new ByteRange(bytes, offset, length); + if (isHashCached()) { + clone.hash = hash; + } + return clone; + } + + /** + * Create a new {@code ByteRange} that points at this range's byte[]. The new + * range can have different values for offset and length, but modifying the + * shallowCopy will modify the bytes in this range's array. Pass over the + * hash code if it is already cached. Position is not preserved in the copy. * @param innerOffset First byte of clone will be this.offset + copyOffset. * @param copyLength Number of bytes in the clone. - * @return new ByteRange object referencing this range's byte[]. + * @return new {@code ByteRange} object referencing this range's byte[]. */ public ByteRange shallowCopySubRange(int innerOffset, int copyLength) { ByteRange clone = new ByteRange(bytes, offset + innerOffset, copyLength); @@ -195,7 +414,7 @@ public class ByteRange implements Comparable { return clone; } - //TODO move to ByteRangeUtils because it is non-core method + //TODO: move to ByteRangeUtils because it is non-core method public int numEqualPrefixBytes(ByteRange that, int thatInnerOffset) { int maxCompares = Math.min(length, that.length - thatInnerOffset); for (int i = 0; i < maxCompares; ++i) { @@ -206,38 +425,9 @@ public class ByteRange implements Comparable { return maxCompares; } - public byte[] getBytes() { - return bytes; - } - - public int getOffset() { - return offset; - } - - public int getLength() { - return length; - } - - public boolean isEmpty(){ - return isEmpty(this); - } - - public boolean notEmpty(){ - return notEmpty(this); - } - - - /******************* static methods ************************/ - - public static boolean isEmpty(ByteRange range){ - return range == null || range.length == 0; - } - - public static boolean notEmpty(ByteRange range){ - return range != null && range.length > 0; - } - - /******************* standard methods *********************/ + // + // methods used for comparison + // @Override public boolean equals(Object thatObject) { @@ -278,7 +468,7 @@ public class ByteRange implements Comparable { return hash != UNSET_HASH_VALUE; } - private void clearHashCache() { + protected void clearHashCache() { hash = UNSET_HASH_VALUE; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java deleted file mode 100644 index dd7cce7..0000000 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.util; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collection; - -import com.google.common.collect.Lists; - -/** - * Utility methods {@link ByteRange}. - */ -public class ByteRangeTool { - - public static ArrayList copyToNewArrays(Collection ranges) { - if (ranges == null) { - return new ArrayList(0); - } - ArrayList arrays = Lists.newArrayListWithCapacity(ranges.size()); - for (ByteRange range : ranges) { - arrays.add(range.deepCopyToNewArray()); - } - return arrays; - } - - public static ArrayList fromArrays(Collection arrays) { - if (arrays == null) { - return new ArrayList(0); - } - ArrayList ranges = Lists.newArrayListWithCapacity(arrays.size()); - for (byte[] array : arrays) { - ranges.add(new ByteRange(array)); - } - return ranges; - } - - public static void write(OutputStream os, ByteRange byteRange) throws IOException { - os.write(byteRange.getBytes(), byteRange.getOffset(), byteRange.getLength()); - } - - public static void write(OutputStream os, ByteRange byteRange, int byteRangeInnerOffset) - throws IOException { - os.write(byteRange.getBytes(), byteRange.getOffset() + byteRangeInnerOffset, - byteRange.getLength() - byteRangeInnerOffset); - } - -} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeUtils.java new file mode 100644 index 0000000..29f5100 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeUtils.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collection; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import com.google.common.collect.Lists; + +/** + * Utility methods {@link ByteRange}. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class ByteRangeUtils { + + public static ArrayList copyToNewArrays(Collection ranges) { + if (ranges == null) { + return new ArrayList(0); + } + ArrayList arrays = Lists.newArrayListWithCapacity(ranges.size()); + for (ByteRange range : ranges) { + arrays.add(range.deepCopyToNewArray()); + } + return arrays; + } + + public static ArrayList fromArrays(Collection arrays) { + if (arrays == null) { + return new ArrayList(0); + } + ArrayList ranges = Lists.newArrayListWithCapacity(arrays.size()); + for (byte[] array : arrays) { + ranges.add(new ByteRange(array)); + } + return ranges; + } + + public static void write(OutputStream os, ByteRange byteRange) throws IOException { + os.write(byteRange.getBytes(), byteRange.getOffset(), byteRange.getLength()); + } + + public static void write(OutputStream os, ByteRange byteRange, int byteRangeInnerOffset) + throws IOException { + os.write(byteRange.getBytes(), byteRange.getOffset() + byteRangeInnerOffset, + byteRange.getLength() - byteRangeInnerOffset); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/PositionedByteRange.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/PositionedByteRange.java new file mode 100644 index 0000000..bfe452d --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/PositionedByteRange.java @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.nio.ByteBuffer; + +import com.google.common.annotations.VisibleForTesting; + +/** + *

    + * Extends {@link ByteRange} with additional methods to support tracking a + * consumers position within the viewport. The API is extended with methods + * {@link #get()} and {@link #put(byte)} for interacting with the backing + * array from the current position forward. This frees the caller from managing + * their own index into the array. {@code position} is considered transient, + * not fundamental to the definition of the range, and does not participate in + * comparison or copy operations. + *

    + *

    + * Designed to be a slimmed-down, mutable alternative to {@link ByteBuffer}. + *

    + */ +public class PositionedByteRange extends ByteRange { + + /** + * The current index into the range. Like {@link ByteBuffer} position, it + * points to the next value that will be read/written in the array. It + * provides the appearance of being 0-indexed, even though its value is + * calculated according to offset. + *

    + * Position is considered transient and does not participate in + * {@link #equals(Object)} or {@link #hashCode()} comparisons. + *

    + */ + private int position = 0; + + /** + * Create a new {@code PositionedByteRange} lacking a backing array and with + * an undefined viewport. + */ + public PositionedByteRange() { + super(); + } + + /** + * Create a new {@code PositionedByteRange} over a new backing array of + * size {@code capacity}. The range's offset and length are 0 and + * {@code capacity}, respectively. + * @param capacity the size of the backing array. + */ + public PositionedByteRange(int capacity) { + super(capacity); + } + + /** + * Create a new {@code PositionedByteRange} over the provided {@code bytes}. + * @param bytes The array to wrap. + */ + public PositionedByteRange(byte[] bytes) { + super(bytes); + } + + /** + * Create a new {@code PositionedByteRange} over the provided {@code bytes}. + * @param bytes The array to wrap. + * @param offset The offset into {@code bytes} considered the beginning + * of this range. + * @param length The length of this range. + */ + public PositionedByteRange(byte[] bytes, int offset, int length) { + super(bytes, offset, length); + } + + @Override + public PositionedByteRange unset() { + this.position = 0; + super.unset(); + return this; + } + + @Override + public PositionedByteRange set(int capacity) { + this.position = 0; + super.set(capacity); + return this; + } + + @Override + public PositionedByteRange set(byte[] bytes) { + this.position = 0; + super.set(bytes); + return this; + } + + @Override + public PositionedByteRange set(byte[] bytes, int offset, int length) { + this.position = 0; + super.set(bytes, offset, length); + return this; + } + + /** + * Update the beginning of this range. {@code offset + length} may not be + * greater than {@code bytes.length}. Resets {@code position} to 0. + * @param offset the new start of this range. + * @return this. + */ + @Override + public PositionedByteRange setOffset(int offset) { + this.position = 0; + super.setOffset(offset); + return this; + } + + /** + * Update the length of this range. {@code offset + length} should not be + * greater than {@code bytes.length}. If {@code position} is greater than + * the new {@code length}, sets {@code position} to {@code length}. + * @param length The new length of this range. + * @return this. + */ + @Override + public PositionedByteRange setLength(int length) { + this.position = Math.min(position, length); + return this.setLength(length); + } + + /** + * The current {@code position} marker. This value is 0-indexed, relative to + * the beginning of the range. + */ + public int getPosition() { return position; } + + /** + * Update the {@code position} index. May not be greater than {@code length}. + * @param position the new position in this range. + */ + public void setPosition(int position) { this.position = position; } + + /** + * The number of bytes remaining between position and the end of the range. + */ + public int getRemaining() { return length - position; } + + /** + * Retrieve the next byte from this range. + */ + public byte get() { return get(position++); } + + /** + * Fill {@code dst} with bytes from the range, starting from {@code position}. + * This range's {@code position} is incremented by the length of {@code dst}, + * the number of bytes copied. + * @param dst the destination of the copy. + * @return this. + */ + public PositionedByteRange get(byte[] dst) { + if (0 == dst.length) return this; + return get(dst, 0, dst.length); + } + + /** + * Fill {@code dst} with bytes from the range, starting from the current + * {@code position}. {@code length} bytes are copied into {@code dst}, + * starting at {@code offset}. This range's {@code position} is incremented + * by the number of bytes copied. + * @param dst the destination of the copy. + * @param offset the offset into {@code dst} to start the copy. + * @param length the number of bytes to copy into {@code dst}. + * @return this. + */ + public PositionedByteRange get(byte[] dst, int offset, int length) { + if (0 == length) return this; + System.arraycopy(this.bytes, this.offset + this.position, dst, offset, length); + this.position += length; + return this; + } + + /** + * Store {@code val} at the next position in this range. + * @param val the new value. + * @return this. + */ + public PositionedByteRange put(byte val) { + put(position++, val); + return this; + } + + /** + * Store the content of {@code val} in this range, starting at the next position. + * @param val the new value. + * @return this. + */ + public PositionedByteRange put(byte[] val) { + if (0 == val.length) return this; + return put(val, 0, val.length); + } + + /** + * Store {@code length} bytes from {@code val} into this range. Bytes from + * {@code val} are copied starting at {@code offset} into the range, starting at + * the current position. + * @param val the new value. + * @param offset the offset in {@code val} from which to start copying. + * @param length the number of bytes to copy from {@code val}. + * @return this. + */ + public PositionedByteRange put(byte[] val, int offset, int length) { + if (0 == length) return this; + System.arraycopy(val, offset, this.bytes, this.offset + this.position, length); + this.position += length; + return this; + } + + /** + * Similar to {@link ByteBuffer#flip()}. Sets length to position, position + * to offset. + */ + @VisibleForTesting + PositionedByteRange flip() { + clearHashCache(); + length = position; + position = offset; + return this; + } + + /** + * Similar to {@link ByteBuffer#clear()}. Sets position to 0, length to + * capacity. + */ + @VisibleForTesting + PositionedByteRange clear() { + clearHashCache(); + position = 0; + length = bytes.length - offset; + return this; + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java index 5b50cb8..4ecac50 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java @@ -17,24 +17,21 @@ */ package org.apache.hadoop.hbase.util; -import junit.framework.Assert; -import junit.framework.TestCase; - import org.apache.hadoop.hbase.SmallTests; +import org.junit.Assert; +import org.junit.Test; import org.junit.experimental.categories.Category; - @Category(SmallTests.class) -public class TestByteRange extends TestCase { +public class TestByteRange { + @Test public void testEmpty(){ Assert.assertTrue(ByteRange.isEmpty(null)); ByteRange r = new ByteRange(); Assert.assertTrue(ByteRange.isEmpty(r)); - Assert.assertFalse(ByteRange.notEmpty(r)); Assert.assertTrue(r.isEmpty()); - Assert.assertFalse(r.notEmpty()); - Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior + r.set(new byte[0]); Assert.assertEquals(0, r.getBytes().length); Assert.assertEquals(0, r.getOffset()); Assert.assertEquals(0, r.getLength()); @@ -43,7 +40,8 @@ public class TestByteRange extends TestCase { Assert.assertEquals(0, r.hashCode()); } - public void testBasics(){ + @Test + public void testBasics() { ByteRange r = new ByteRange(new byte[]{1, 3, 2}); Assert.assertFalse(ByteRange.isEmpty(r)); Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior @@ -70,6 +68,4 @@ public class TestByteRange extends TestCase { r.setLength(2);//verify we retained the 2nd byte, but dangerous in real code Assert.assertTrue(Bytes.equals(new byte[]{1, 3}, r.deepCopyToNewArray())); } - } - diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestPositionedByteRange.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestPositionedByteRange.java new file mode 100644 index 0000000..f39e0ae --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestPositionedByteRange.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestPositionedByteRange { + @Test + public void testPosition() { + PositionedByteRange r = new PositionedByteRange(new byte[5], 1, 3); + + // exercise single-byte put + r.put(Bytes.toBytes("f")[0]) + .put(Bytes.toBytes("o")[0]) + .put(Bytes.toBytes("o")[0]); + Assert.assertEquals(3, r.getPosition()); + Assert.assertArrayEquals( + new byte[] { 0, Bytes.toBytes("f")[0], Bytes.toBytes("o")[0], Bytes.toBytes("o")[0], 0 }, + r.getBytes()); + + // exercise multi-byte put + r.setPosition(0); + r.put(Bytes.toBytes("f")) + .put(Bytes.toBytes("o")) + .put(Bytes.toBytes("o")); + Assert.assertEquals(3, r.getPosition()); + Assert.assertArrayEquals( + new byte[] { 0, Bytes.toBytes("f")[0], Bytes.toBytes("o")[0], Bytes.toBytes("o")[0], 0 }, + r.getBytes()); + + // exercise single-byte get + r.setPosition(0); + Assert.assertEquals(Bytes.toBytes("f")[0], r.get()); + Assert.assertEquals(Bytes.toBytes("o")[0], r.get()); + Assert.assertEquals(Bytes.toBytes("o")[0], r.get()); + + r.setPosition(1); + Assert.assertEquals(Bytes.toBytes("o")[0], r.get()); + + // exercise multi-byte get + r.setPosition(0); + byte[] dst = new byte[3]; + r.get(dst); + Assert.assertArrayEquals(Bytes.toBytes("foo"), dst); + + // set position to the end of the range; this should not throw. + r.setPosition(3); + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java index 29ebafa..d253e39 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java @@ -28,7 +28,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.ByteRangeUtils; import org.apache.hadoop.hbase.util.CollectionUtils; import org.apache.hadoop.hbase.util.vint.UFIntTool; import org.apache.hadoop.hbase.util.vint.UVIntTool; @@ -155,7 +155,7 @@ public class RowNodeWriter{ protected void writeRowToken(OutputStream os) throws IOException { UVIntTool.writeBytes(tokenWidth, os); int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1; - ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex); + ByteRangeUtils.write(os, tokenizerNode.getToken(), tokenStartIndex); } /** diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java index 077b5f5..dd6a3e1 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java @@ -164,7 +164,7 @@ public class TokenizerNode{ parent = null; nodeDepth = 0; tokenStartOffset = 0; - token.clear(); + token.unset(); numOccurrences = 0; children.clear();// branches & nubs diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java index f1d0456..71b5b1c 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWrite import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.ByteRangeUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; import org.junit.Assert; @@ -67,7 +67,7 @@ public class TestColumnBuilder { List inputs = columns.getInputs(); this.columnSorter = new ByteRangeTreeSet(inputs); this.sortedUniqueColumns = columnSorter.compile().getSortedRanges(); - List copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns); + List copies = ByteRangeUtils.copyToNewArrays(sortedUniqueColumns); Assert.assertTrue(Bytes.isSorted(copies)); this.blockMeta = new PrefixTreeBlockMeta(); this.blockMeta.setNumMetaBytes(0); diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java index 5921116..bdb77ea 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java @@ -22,7 +22,7 @@ import java.util.List; import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData; import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.ByteRangeUtils; import org.apache.hadoop.hbase.util.Bytes; import com.google.common.collect.Lists; @@ -37,7 +37,7 @@ public class TestColumnDataSimple implements TestColumnData { d.add("abc"); d.add("bbc"); d.add("abc"); - return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d)); } @Override @@ -46,7 +46,7 @@ public class TestColumnDataSimple implements TestColumnData { d.add("abc"); d.add("abcde"); d.add("bbc"); - return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + return ByteRangeUtils.fromArrays(Bytes.getUtf8ByteArrays(d)); } } -- 1.8.3.2