diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java
index 676ffc3..4db0b7f 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java
@@ -41,7 +41,7 @@ public enum DataBlockEncoding {
FAST_DIFF(4, "org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"),
// id 5 is reserved for the COPY_KEY algorithm for benchmarking
// COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"),
- PREFIX_TREE(6, "org.apache.hbase.codec.prefixtree.PrefixTreeCodec");
+ PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec");
private final short id;
private final byte[] idInBytes;
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java
new file mode 100644
index 0000000..0164306
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java
@@ -0,0 +1,841 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.vint.UVIntTool;
+import org.apache.hadoop.hbase.util.vint.UVLongTool;
+
+/**
+ * Information about the block. Stored at the beginning of the byte[]. Contains things
+ * like minimum timestamp and width of FInts in the row tree.
+ *
+ * Most fields stored in VInts that get decoded on the first access of each new block.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeBlockMeta {
+
+ /******************* static fields ********************/
+
+ public static final int VERSION = 0;
+
+ public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
+
+ public static final int
+ NUM_LONGS = 2,
+ NUM_INTS = 22,
+ NUM_SHORTS = 0,//keyValueTypeWidth not persisted
+ NUM_SINGLE_BYTES = 2,
+ MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
+ + Bytes.SIZEOF_SHORT * NUM_SHORTS
+ + Bytes.SIZEOF_INT * NUM_INTS
+ + NUM_SINGLE_BYTES;
+
+
+ /**************** transient fields *********************/
+
+ protected int arrayOffset;
+ protected int bufferOffset;
+
+
+ /**************** persisted fields **********************/
+
+ // PrefixTree version to allow future format modifications
+ protected int version;
+ protected int numMetaBytes;
+ protected int numKeyValueBytes;
+ protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
+
+ // split the byte[] into 6 sections for the different data types
+ protected int numRowBytes;
+ protected int numFamilyBytes;
+ protected int numQualifierBytes;
+ protected int numTimestampBytes;
+ protected int numMvccVersionBytes;
+ protected int numValueBytes;
+
+ // number of bytes in each section of fixed width FInts
+ protected int nextNodeOffsetWidth;
+ protected int familyOffsetWidth;
+ protected int qualifierOffsetWidth;
+ protected int timestampIndexWidth;
+ protected int mvccVersionIndexWidth;
+ protected int valueOffsetWidth;
+ protected int valueLengthWidth;
+
+ // used to pre-allocate structures for reading
+ protected int rowTreeDepth;
+ protected int maxRowLength;
+ protected int maxQualifierLength;
+
+ // the timestamp from which the deltas are calculated
+ protected long minTimestamp;
+ protected int timestampDeltaWidth;
+ protected long minMvccVersion;
+ protected int mvccVersionDeltaWidth;
+
+ protected boolean allSameType;
+ protected byte allTypes;
+
+ protected int numUniqueRows;
+ protected int numUniqueFamilies;
+ protected int numUniqueQualifiers;
+
+
+ /***************** constructors ********************/
+
+ public PrefixTreeBlockMeta() {
+ }
+
+ public PrefixTreeBlockMeta(InputStream is) throws IOException{
+ this.version = VERSION;
+ this.arrayOffset = 0;
+ this.bufferOffset = 0;
+ readVariableBytesFromInputStream(is);
+ }
+
+ /**
+ * @param buffer positioned at start of PtBlockMeta
+ */
+ public PrefixTreeBlockMeta(ByteBuffer buffer) {
+ initOnBlock(buffer);
+ }
+
+ public void initOnBlock(ByteBuffer buffer) {
+ arrayOffset = buffer.arrayOffset();
+ bufferOffset = buffer.position();
+ readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset);
+ }
+
+
+ /**************** operate on each field **********************/
+
+ public int calculateNumMetaBytes(){
+ int numBytes = 0;
+ numBytes += UVIntTool.numBytes(version);
+ numBytes += UVLongTool.numBytes(numMetaBytes);
+ numBytes += UVIntTool.numBytes(numKeyValueBytes);
+ ++numBytes;//os.write(getIncludesMvccVersion());
+
+ numBytes += UVIntTool.numBytes(numRowBytes);
+ numBytes += UVIntTool.numBytes(numFamilyBytes);
+ numBytes += UVIntTool.numBytes(numQualifierBytes);
+ numBytes += UVIntTool.numBytes(numTimestampBytes);
+ numBytes += UVIntTool.numBytes(numMvccVersionBytes);
+ numBytes += UVIntTool.numBytes(numValueBytes);
+
+ numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
+ numBytes += UVIntTool.numBytes(familyOffsetWidth);
+ numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
+ numBytes += UVIntTool.numBytes(timestampIndexWidth);
+ numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
+ numBytes += UVIntTool.numBytes(valueOffsetWidth);
+ numBytes += UVIntTool.numBytes(valueLengthWidth);
+
+ numBytes += UVIntTool.numBytes(rowTreeDepth);
+ numBytes += UVIntTool.numBytes(maxRowLength);
+ numBytes += UVIntTool.numBytes(maxQualifierLength);
+
+ numBytes += UVLongTool.numBytes(minTimestamp);
+ numBytes += UVIntTool.numBytes(timestampDeltaWidth);
+ numBytes += UVLongTool.numBytes(minMvccVersion);
+ numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
+ ++numBytes;//os.write(getAllSameTypeByte());
+ ++numBytes;//os.write(allTypes);
+
+ numBytes += UVIntTool.numBytes(numUniqueRows);
+ numBytes += UVIntTool.numBytes(numUniqueFamilies);
+ numBytes += UVIntTool.numBytes(numUniqueQualifiers);
+ return numBytes;
+ }
+
+ public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
+ UVIntTool.writeBytes(version, os);
+ UVIntTool.writeBytes(numMetaBytes, os);
+ UVIntTool.writeBytes(numKeyValueBytes, os);
+ os.write(getIncludesMvccVersionByte());
+
+ UVIntTool.writeBytes(numRowBytes, os);
+ UVIntTool.writeBytes(numFamilyBytes, os);
+ UVIntTool.writeBytes(numQualifierBytes, os);
+ UVIntTool.writeBytes(numTimestampBytes, os);
+ UVIntTool.writeBytes(numMvccVersionBytes, os);
+ UVIntTool.writeBytes(numValueBytes, os);
+
+ UVIntTool.writeBytes(nextNodeOffsetWidth, os);
+ UVIntTool.writeBytes(familyOffsetWidth, os);
+ UVIntTool.writeBytes(qualifierOffsetWidth, os);
+ UVIntTool.writeBytes(timestampIndexWidth, os);
+ UVIntTool.writeBytes(mvccVersionIndexWidth, os);
+ UVIntTool.writeBytes(valueOffsetWidth, os);
+ UVIntTool.writeBytes(valueLengthWidth, os);
+
+ UVIntTool.writeBytes(rowTreeDepth, os);
+ UVIntTool.writeBytes(maxRowLength, os);
+ UVIntTool.writeBytes(maxQualifierLength, os);
+
+ UVLongTool.writeBytes(minTimestamp, os);
+ UVIntTool.writeBytes(timestampDeltaWidth, os);
+ UVLongTool.writeBytes(minMvccVersion, os);
+ UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
+ os.write(getAllSameTypeByte());
+ os.write(allTypes);
+
+ UVIntTool.writeBytes(numUniqueRows, os);
+ UVIntTool.writeBytes(numUniqueFamilies, os);
+ UVIntTool.writeBytes(numUniqueQualifiers, os);
+ }
+
+ public void readVariableBytesFromInputStream(InputStream is) throws IOException{
+ version = UVIntTool.getInt(is);
+ numMetaBytes = UVIntTool.getInt(is);
+ numKeyValueBytes = UVIntTool.getInt(is);
+ setIncludesMvccVersion((byte) is.read());
+
+ numRowBytes = UVIntTool.getInt(is);
+ numFamilyBytes = UVIntTool.getInt(is);
+ numQualifierBytes = UVIntTool.getInt(is);
+ numTimestampBytes = UVIntTool.getInt(is);
+ numMvccVersionBytes = UVIntTool.getInt(is);
+ numValueBytes = UVIntTool.getInt(is);
+
+ nextNodeOffsetWidth = UVIntTool.getInt(is);
+ familyOffsetWidth = UVIntTool.getInt(is);
+ qualifierOffsetWidth = UVIntTool.getInt(is);
+ timestampIndexWidth = UVIntTool.getInt(is);
+ mvccVersionIndexWidth = UVIntTool.getInt(is);
+ valueOffsetWidth = UVIntTool.getInt(is);
+ valueLengthWidth = UVIntTool.getInt(is);
+
+ rowTreeDepth = UVIntTool.getInt(is);
+ maxRowLength = UVIntTool.getInt(is);
+ maxQualifierLength = UVIntTool.getInt(is);
+
+ minTimestamp = UVLongTool.getLong(is);
+ timestampDeltaWidth = UVIntTool.getInt(is);
+ minMvccVersion = UVLongTool.getLong(is);
+ mvccVersionDeltaWidth = UVIntTool.getInt(is);
+
+ setAllSameType((byte) is.read());
+ allTypes = (byte) is.read();
+
+ numUniqueRows = UVIntTool.getInt(is);
+ numUniqueFamilies = UVIntTool.getInt(is);
+ numUniqueQualifiers = UVIntTool.getInt(is);
+ }
+
+ public void readVariableBytesFromArray(byte[] bytes, int offset) {
+ int position = offset;
+
+ version = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(version);
+ numMetaBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numMetaBytes);
+ numKeyValueBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numKeyValueBytes);
+ setIncludesMvccVersion(bytes[position]);
+ ++position;
+
+ numRowBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numRowBytes);
+ numFamilyBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numFamilyBytes);
+ numQualifierBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numQualifierBytes);
+ numTimestampBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numTimestampBytes);
+ numMvccVersionBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numMvccVersionBytes);
+ numValueBytes = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numValueBytes);
+
+ nextNodeOffsetWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(nextNodeOffsetWidth);
+ familyOffsetWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(familyOffsetWidth);
+ qualifierOffsetWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(qualifierOffsetWidth);
+ timestampIndexWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(timestampIndexWidth);
+ mvccVersionIndexWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(mvccVersionIndexWidth);
+ valueOffsetWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(valueOffsetWidth);
+ valueLengthWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(valueLengthWidth);
+
+ rowTreeDepth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(rowTreeDepth);
+ maxRowLength = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(maxRowLength);
+ maxQualifierLength = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(maxQualifierLength);
+
+ minTimestamp = UVLongTool.getLong(bytes, position);
+ position += UVLongTool.numBytes(minTimestamp);
+ timestampDeltaWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(timestampDeltaWidth);
+ minMvccVersion = UVLongTool.getLong(bytes, position);
+ position += UVLongTool.numBytes(minMvccVersion);
+ mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(mvccVersionDeltaWidth);
+
+ setAllSameType(bytes[position]);
+ ++position;
+ allTypes = bytes[position];
+ ++position;
+
+ numUniqueRows = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numUniqueRows);
+ numUniqueFamilies = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numUniqueFamilies);
+ numUniqueQualifiers = UVIntTool.getInt(bytes, position);
+ position += UVIntTool.numBytes(numUniqueQualifiers);
+ }
+
+ //TODO method that can read directly from ByteBuffer instead of InputStream
+
+
+ /*************** methods *************************/
+
+ public int getKeyValueTypeWidth() {
+ return allSameType ? 0 : 1;
+ }
+
+ public byte getIncludesMvccVersionByte() {
+ return includesMvccVersion ? (byte) 1 : (byte) 0;
+ }
+
+ public void setIncludesMvccVersion(byte includesMvccVersionByte) {
+ includesMvccVersion = includesMvccVersionByte != 0;
+ }
+
+ public byte getAllSameTypeByte() {
+ return allSameType ? (byte) 1 : (byte) 0;
+ }
+
+ public void setAllSameType(byte allSameTypeByte) {
+ allSameType = allSameTypeByte != 0;
+ }
+
+ public boolean isAllSameTimestamp() {
+ return timestampIndexWidth == 0;
+ }
+
+ public boolean isAllSameMvccVersion() {
+ return mvccVersionIndexWidth == 0;
+ }
+
+ public void setTimestampFields(LongEncoder encoder){
+ this.minTimestamp = encoder.getMin();
+ this.timestampIndexWidth = encoder.getBytesPerIndex();
+ this.timestampDeltaWidth = encoder.getBytesPerDelta();
+ this.numTimestampBytes = encoder.getTotalCompressedBytes();
+ }
+
+ public void setMvccVersionFields(LongEncoder encoder){
+ this.minMvccVersion = encoder.getMin();
+ this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
+ this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
+ this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
+ }
+
+
+ /*************** Object methods *************************/
+
+ /**
+ * Generated by Eclipse
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
+ if (allSameType != other.allSameType)
+ return false;
+ if (allTypes != other.allTypes)
+ return false;
+ if (arrayOffset != other.arrayOffset)
+ return false;
+ if (bufferOffset != other.bufferOffset)
+ return false;
+ if (valueLengthWidth != other.valueLengthWidth)
+ return false;
+ if (valueOffsetWidth != other.valueOffsetWidth)
+ return false;
+ if (familyOffsetWidth != other.familyOffsetWidth)
+ return false;
+ if (includesMvccVersion != other.includesMvccVersion)
+ return false;
+ if (maxQualifierLength != other.maxQualifierLength)
+ return false;
+ if (maxRowLength != other.maxRowLength)
+ return false;
+ if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
+ return false;
+ if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
+ return false;
+ if (minMvccVersion != other.minMvccVersion)
+ return false;
+ if (minTimestamp != other.minTimestamp)
+ return false;
+ if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
+ return false;
+ if (numValueBytes != other.numValueBytes)
+ return false;
+ if (numFamilyBytes != other.numFamilyBytes)
+ return false;
+ if (numMvccVersionBytes != other.numMvccVersionBytes)
+ return false;
+ if (numMetaBytes != other.numMetaBytes)
+ return false;
+ if (numQualifierBytes != other.numQualifierBytes)
+ return false;
+ if (numRowBytes != other.numRowBytes)
+ return false;
+ if (numTimestampBytes != other.numTimestampBytes)
+ return false;
+ if (numUniqueFamilies != other.numUniqueFamilies)
+ return false;
+ if (numUniqueQualifiers != other.numUniqueQualifiers)
+ return false;
+ if (numUniqueRows != other.numUniqueRows)
+ return false;
+ if (numKeyValueBytes != other.numKeyValueBytes)
+ return false;
+ if (qualifierOffsetWidth != other.qualifierOffsetWidth)
+ return false;
+ if (rowTreeDepth != other.rowTreeDepth)
+ return false;
+ if (timestampDeltaWidth != other.timestampDeltaWidth)
+ return false;
+ if (timestampIndexWidth != other.timestampIndexWidth)
+ return false;
+ if (version != other.version)
+ return false;
+ return true;
+ }
+
+ /**
+ * Generated by Eclipse
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (allSameType ? 1231 : 1237);
+ result = prime * result + allTypes;
+ result = prime * result + arrayOffset;
+ result = prime * result + bufferOffset;
+ result = prime * result + valueLengthWidth;
+ result = prime * result + valueOffsetWidth;
+ result = prime * result + familyOffsetWidth;
+ result = prime * result + (includesMvccVersion ? 1231 : 1237);
+ result = prime * result + maxQualifierLength;
+ result = prime * result + maxRowLength;
+ result = prime * result + mvccVersionDeltaWidth;
+ result = prime * result + mvccVersionIndexWidth;
+ result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
+ result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
+ result = prime * result + nextNodeOffsetWidth;
+ result = prime * result + numValueBytes;
+ result = prime * result + numFamilyBytes;
+ result = prime * result + numMvccVersionBytes;
+ result = prime * result + numMetaBytes;
+ result = prime * result + numQualifierBytes;
+ result = prime * result + numRowBytes;
+ result = prime * result + numTimestampBytes;
+ result = prime * result + numUniqueFamilies;
+ result = prime * result + numUniqueQualifiers;
+ result = prime * result + numUniqueRows;
+ result = prime * result + numKeyValueBytes;
+ result = prime * result + qualifierOffsetWidth;
+ result = prime * result + rowTreeDepth;
+ result = prime * result + timestampDeltaWidth;
+ result = prime * result + timestampIndexWidth;
+ result = prime * result + version;
+ return result;
+ }
+
+ /**
+ * Generated by Eclipse
+ */
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("PtBlockMeta [arrayOffset=");
+ builder.append(arrayOffset);
+ builder.append(", bufferOffset=");
+ builder.append(bufferOffset);
+ builder.append(", version=");
+ builder.append(version);
+ builder.append(", numMetaBytes=");
+ builder.append(numMetaBytes);
+ builder.append(", numKeyValueBytes=");
+ builder.append(numKeyValueBytes);
+ builder.append(", includesMvccVersion=");
+ builder.append(includesMvccVersion);
+ builder.append(", numRowBytes=");
+ builder.append(numRowBytes);
+ builder.append(", numFamilyBytes=");
+ builder.append(numFamilyBytes);
+ builder.append(", numQualifierBytes=");
+ builder.append(numQualifierBytes);
+ builder.append(", numTimestampBytes=");
+ builder.append(numTimestampBytes);
+ builder.append(", numMvccVersionBytes=");
+ builder.append(numMvccVersionBytes);
+ builder.append(", numValueBytes=");
+ builder.append(numValueBytes);
+ builder.append(", nextNodeOffsetWidth=");
+ builder.append(nextNodeOffsetWidth);
+ builder.append(", familyOffsetWidth=");
+ builder.append(familyOffsetWidth);
+ builder.append(", qualifierOffsetWidth=");
+ builder.append(qualifierOffsetWidth);
+ builder.append(", timestampIndexWidth=");
+ builder.append(timestampIndexWidth);
+ builder.append(", mvccVersionIndexWidth=");
+ builder.append(mvccVersionIndexWidth);
+ builder.append(", valueOffsetWidth=");
+ builder.append(valueOffsetWidth);
+ builder.append(", valueLengthWidth=");
+ builder.append(valueLengthWidth);
+ builder.append(", rowTreeDepth=");
+ builder.append(rowTreeDepth);
+ builder.append(", maxRowLength=");
+ builder.append(maxRowLength);
+ builder.append(", maxQualifierLength=");
+ builder.append(maxQualifierLength);
+ builder.append(", minTimestamp=");
+ builder.append(minTimestamp);
+ builder.append(", timestampDeltaWidth=");
+ builder.append(timestampDeltaWidth);
+ builder.append(", minMvccVersion=");
+ builder.append(minMvccVersion);
+ builder.append(", mvccVersionDeltaWidth=");
+ builder.append(mvccVersionDeltaWidth);
+ builder.append(", allSameType=");
+ builder.append(allSameType);
+ builder.append(", allTypes=");
+ builder.append(allTypes);
+ builder.append(", numUniqueRows=");
+ builder.append(numUniqueRows);
+ builder.append(", numUniqueFamilies=");
+ builder.append(numUniqueFamilies);
+ builder.append(", numUniqueQualifiers=");
+ builder.append(numUniqueQualifiers);
+ builder.append("]");
+ return builder.toString();
+ }
+
+
+ /************** absolute getters *******************/
+
+ public int getAbsoluteMetaOffset() {
+ return arrayOffset + bufferOffset;
+ }
+
+ public int getAbsoluteRowOffset() {
+ return getAbsoluteMetaOffset() + numMetaBytes;
+ }
+
+ public int getAbsoluteFamilyOffset() {
+ return getAbsoluteRowOffset() + numRowBytes;
+ }
+
+ public int getAbsoluteQualifierOffset() {
+ return getAbsoluteFamilyOffset() + numFamilyBytes;
+ }
+
+ public int getAbsoluteTimestampOffset() {
+ return getAbsoluteQualifierOffset() + numQualifierBytes;
+ }
+
+ public int getAbsoluteMvccVersionOffset() {
+ return getAbsoluteTimestampOffset() + numTimestampBytes;
+ }
+
+ public int getAbsoluteValueOffset() {
+ return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
+ }
+
+
+ /*************** get/set ***************************/
+
+ public int getTimestampDeltaWidth() {
+ return timestampDeltaWidth;
+ }
+
+ public void setTimestampDeltaWidth(int timestampDeltaWidth) {
+ this.timestampDeltaWidth = timestampDeltaWidth;
+ }
+
+ public int getValueOffsetWidth() {
+ return valueOffsetWidth;
+ }
+
+ public void setValueOffsetWidth(int dataOffsetWidth) {
+ this.valueOffsetWidth = dataOffsetWidth;
+ }
+
+ public int getValueLengthWidth() {
+ return valueLengthWidth;
+ }
+
+ public void setValueLengthWidth(int dataLengthWidth) {
+ this.valueLengthWidth = dataLengthWidth;
+ }
+
+ public int getMaxRowLength() {
+ return maxRowLength;
+ }
+
+ public void setMaxRowLength(int maxRowLength) {
+ this.maxRowLength = maxRowLength;
+ }
+
+ public long getMinTimestamp() {
+ return minTimestamp;
+ }
+
+ public void setMinTimestamp(long minTimestamp) {
+ this.minTimestamp = minTimestamp;
+ }
+
+ public byte getAllTypes() {
+ return allTypes;
+ }
+
+ public void setAllTypes(byte allTypes) {
+ this.allTypes = allTypes;
+ }
+
+ public boolean isAllSameType() {
+ return allSameType;
+ }
+
+ public void setAllSameType(boolean allSameType) {
+ this.allSameType = allSameType;
+ }
+
+ public int getNextNodeOffsetWidth() {
+ return nextNodeOffsetWidth;
+ }
+
+ public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
+ this.nextNodeOffsetWidth = nextNodeOffsetWidth;
+ }
+
+ public int getNumRowBytes() {
+ return numRowBytes;
+ }
+
+ public void setNumRowBytes(int numRowBytes) {
+ this.numRowBytes = numRowBytes;
+ }
+
+ public int getNumTimestampBytes() {
+ return numTimestampBytes;
+ }
+
+ public void setNumTimestampBytes(int numTimestampBytes) {
+ this.numTimestampBytes = numTimestampBytes;
+ }
+
+ public int getNumValueBytes() {
+ return numValueBytes;
+ }
+
+ public void setNumValueBytes(int numValueBytes) {
+ this.numValueBytes = numValueBytes;
+ }
+
+ public int getNumMetaBytes() {
+ return numMetaBytes;
+ }
+
+ public void setNumMetaBytes(int numMetaBytes) {
+ this.numMetaBytes = numMetaBytes;
+ }
+
+ public int getArrayOffset() {
+ return arrayOffset;
+ }
+
+ public void setArrayOffset(int arrayOffset) {
+ this.arrayOffset = arrayOffset;
+ }
+
+ public int getBufferOffset() {
+ return bufferOffset;
+ }
+
+ public void setBufferOffset(int bufferOffset) {
+ this.bufferOffset = bufferOffset;
+ }
+
+ public int getNumKeyValueBytes() {
+ return numKeyValueBytes;
+ }
+
+ public void setNumKeyValueBytes(int numKeyValueBytes) {
+ this.numKeyValueBytes = numKeyValueBytes;
+ }
+
+ public int getRowTreeDepth() {
+ return rowTreeDepth;
+ }
+
+ public void setRowTreeDepth(int rowTreeDepth) {
+ this.rowTreeDepth = rowTreeDepth;
+ }
+
+ public int getNumMvccVersionBytes() {
+ return numMvccVersionBytes;
+ }
+
+ public void setNumMvccVersionBytes(int numMvccVersionBytes) {
+ this.numMvccVersionBytes = numMvccVersionBytes;
+ }
+
+ public int getMvccVersionDeltaWidth() {
+ return mvccVersionDeltaWidth;
+ }
+
+ public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
+ this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
+ }
+
+ public long getMinMvccVersion() {
+ return minMvccVersion;
+ }
+
+ public void setMinMvccVersion(long minMvccVersion) {
+ this.minMvccVersion = minMvccVersion;
+ }
+
+ public int getNumFamilyBytes() {
+ return numFamilyBytes;
+ }
+
+ public void setNumFamilyBytes(int numFamilyBytes) {
+ this.numFamilyBytes = numFamilyBytes;
+ }
+
+ public int getFamilyOffsetWidth() {
+ return familyOffsetWidth;
+ }
+
+ public void setFamilyOffsetWidth(int familyOffsetWidth) {
+ this.familyOffsetWidth = familyOffsetWidth;
+ }
+
+ public int getNumUniqueRows() {
+ return numUniqueRows;
+ }
+
+ public void setNumUniqueRows(int numUniqueRows) {
+ this.numUniqueRows = numUniqueRows;
+ }
+
+ public int getNumUniqueFamilies() {
+ return numUniqueFamilies;
+ }
+
+ public void setNumUniqueFamilies(int numUniqueFamilies) {
+ this.numUniqueFamilies = numUniqueFamilies;
+ }
+
+ public int getNumUniqueQualifiers() {
+ return numUniqueQualifiers;
+ }
+
+ public void setNumUniqueQualifiers(int numUniqueQualifiers) {
+ this.numUniqueQualifiers = numUniqueQualifiers;
+ }
+
+ public int getNumQualifierBytes() {
+ return numQualifierBytes;
+ }
+
+ public void setNumQualifierBytes(int numQualifierBytes) {
+ this.numQualifierBytes = numQualifierBytes;
+ }
+
+ public int getQualifierOffsetWidth() {
+ return qualifierOffsetWidth;
+ }
+
+ public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
+ this.qualifierOffsetWidth = qualifierOffsetWidth;
+ }
+
+ public int getMaxQualifierLength() {
+ return maxQualifierLength;
+ }
+
+ public void setMaxQualifierLength(int maxQualifierLength) {
+ this.maxQualifierLength = maxQualifierLength;
+ }
+
+ public int getTimestampIndexWidth() {
+ return timestampIndexWidth;
+ }
+
+ public void setTimestampIndexWidth(int timestampIndexWidth) {
+ this.timestampIndexWidth = timestampIndexWidth;
+ }
+
+ public int getMvccVersionIndexWidth() {
+ return mvccVersionIndexWidth;
+ }
+
+ public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
+ this.mvccVersionIndexWidth = mvccVersionIndexWidth;
+ }
+
+ public int getVersion() {
+ return version;
+ }
+
+ public void setVersion(int version) {
+ this.version = version;
+ }
+
+ public boolean isIncludesMvccVersion() {
+ return includesMvccVersion;
+ }
+
+ public void setIncludesMvccVersion(boolean includesMvccVersion) {
+ this.includesMvccVersion = includesMvccVersion;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
new file mode 100644
index 0000000..2aa5a2b
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.KeyComparator;
+import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator;
+import org.apache.hadoop.hbase.KeyValue.RootKeyComparator;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
+import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
+import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
+import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
+import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
+ * package changes.
+ *
+ * PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
+ * for PrefixTree encoding and decoding. Encoding is delegated to instances of
+ * {@link PrefixTreeEncoder}, and decoding is delegated to instances of
+ * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are
+ * created and recycled by static PtEncoderFactory and PtDecoderFactory.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeCodec implements DataBlockEncoder{
+
+ /**
+ * no-arg constructor for reflection
+ */
+ public PrefixTreeCodec() {
+ }
+
+ /**
+ * Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar
+ * enough with the concept of the HFileBlockEncodingContext.
+ */
+ @Override
+ public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion,
+ HFileBlockEncodingContext blkEncodingCtx) throws IOException {
+ if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
+ throw new IOException(this.getClass().getName() + " only accepts "
+ + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
+ }
+
+ HFileBlockDefaultEncodingContext encodingCtx
+ = (HFileBlockDefaultEncodingContext) blkEncodingCtx;
+ encodingCtx.prepareEncoding();
+ DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder();
+ internalEncodeKeyValues(dataOut, in, includesMvccVersion);
+
+ //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
+ if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
+ encodingCtx.postEncoding(BlockType.ENCODED_DATA);
+ } else {
+ encodingCtx.postEncoding(BlockType.DATA);
+ }
+ }
+
+ private void internalEncodeKeyValues(DataOutputStream encodedOutputStream,
+ ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException {
+ rawKeyValues.rewind();
+ PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion);
+
+ try{
+ KeyValue kv;
+ while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) {
+ builder.write(kv);
+ }
+ builder.flush();
+ }finally{
+ EncoderFactory.checkIn(builder);
+ }
+ }
+
+
+ @Override
+ public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion)
+ throws IOException {
+ return decodeKeyValues(source, 0, 0, includesMvccVersion);
+ }
+
+
+ /**
+ * I don't think this method is called during normal HBase operation, so efficiency is not
+ * important.
+ */
+ @Override
+ public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
+ int skipLastBytes, boolean includesMvccVersion) throws IOException {
+ ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
+ sourceAsBuffer.mark();
+ PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer);
+ sourceAsBuffer.rewind();
+ int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
+ byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
+ ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
+ result.rewind();
+ CellSearcher searcher = null;
+ try {
+ searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion);
+ while (searcher.advance()) {
+ KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current());
+ // needs to be modified for DirectByteBuffers. no existing methods to
+ // write VLongs to byte[]
+ int offset = result.arrayOffset() + result.position();
+ KeyValueUtil.appendToByteArray(currentCell, result.array(), offset);
+ int keyValueLength = KeyValueUtil.length(currentCell);
+ ByteBufferUtils.skip(result, keyValueLength);
+ offset += keyValueLength;
+ if (includesMvccVersion) {
+ ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion());
+ }
+ }
+ result.position(result.limit());//make it appear as if we were appending
+ return result;
+ } finally {
+ DecoderFactory.checkIn(searcher);
+ }
+ }
+
+
+ @Override
+ public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
+ block.rewind();
+ PrefixTreeArraySearcher searcher = null;
+ try {
+ //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
+ searcher = DecoderFactory.checkOut(block, true);
+ if (!searcher.positionAtFirstCell()) {
+ return null;
+ }
+ return KeyValueUtil.copyKeyToNewByteBuffer(searcher.current());
+ } finally {
+ DecoderFactory.checkIn(searcher);
+ }
+ }
+
+ @Override
+ public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm,
+ DataBlockEncoding encoding, byte[] header) {
+ if(DataBlockEncoding.PREFIX_TREE != encoding){
+ //i'm not sure why encoding is in the interface. Each encoder implementation should probably
+ //know it's encoding type
+ throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
+ }
+ return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header);
+ }
+
+ @Override
+ public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) {
+ return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
+ }
+
+ /**
+ * Is this the correct handling of an illegal comparator? How to prevent that from getting all
+ * the way to this point.
+ */
+ @Override
+ public EncodedSeeker createSeeker(RawComparator comparator, boolean includesMvccVersion) {
+ if(! (comparator instanceof KeyComparator)){
+ throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator");
+ }
+ if(comparator instanceof MetaKeyComparator){
+ throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META "
+ +"table");
+ }
+ if(comparator instanceof RootKeyComparator){
+ throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT "
+ +"table");
+ }
+
+ return new PrefixTreeSeeker(includesMvccVersion);
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java
new file mode 100644
index 0000000..a46a34a
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java
@@ -0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
+
+/**
+ * These methods have the same definition as any implementation of the EncodedSeeker.
+ *
+ * In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
+ * currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
+ * but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
+ * the data from the PrefixTreeCell. It is somewhat heavyweight right now.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeSeeker implements EncodedSeeker {
+
+ protected ByteBuffer block;
+ protected boolean includeMvccVersion;
+ protected PrefixTreeArraySearcher ptSearcher;
+
+ public PrefixTreeSeeker(boolean includeMvccVersion) {
+ this.includeMvccVersion = includeMvccVersion;
+ }
+
+ @Override
+ public void setCurrentBuffer(ByteBuffer fullBlockBuffer) {
+ block = fullBlockBuffer;
+ ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion);
+ rewind();
+ }
+
+ /**
+ * Currently unused.
+ *
+ * TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
+ * this can be called
+ */
+ public void releaseCurrentSearcher(){
+ DecoderFactory.checkIn(ptSearcher);
+ }
+
+
+ @Override
+ public ByteBuffer getKeyDeepCopy() {
+ return KeyValueUtil.copyKeyToNewByteBuffer(ptSearcher.current());
+ }
+
+
+ @Override
+ public ByteBuffer getValueShallowCopy() {
+ return CellUtil.getValueBufferShallowCopy(ptSearcher.current());
+ }
+
+ /**
+ * currently must do deep copy into new array
+ */
+ @Override
+ public ByteBuffer getKeyValueBuffer() {
+ return KeyValueUtil.copyToNewByteBuffer(ptSearcher.current());
+ }
+
+ /**
+ * currently must do deep copy into new array
+ */
+ @Override
+ public KeyValue getKeyValue() {
+ return KeyValueUtil.copyToNewKeyValue(ptSearcher.current());
+ }
+
+ /**
+ * Currently unused.
+ *
+ * A nice, lightweight reference, though the underlying cell is transient. This method may return
+ * the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
+ * return a different reference for each Cell.
+ *
+ * The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to
+ * use this method instead of the getKeyValue() methods above.
+ */
+ public Cell get() {
+ return ptSearcher.current();
+ }
+
+ @Override
+ public void rewind() {
+ ptSearcher.positionAtFirstCell();
+ }
+
+ @Override
+ public boolean next() {
+ return ptSearcher.advance();
+ }
+
+// @Override
+ public boolean advance() {
+ return ptSearcher.advance();
+ }
+
+
+ private static final boolean USE_POSITION_BEFORE = false;
+
+ /**
+ * Seek forward only (should be called reseekToKeyInBlock?).
+ *
+ * If the exact key is found look at the seekBefore variable and:
+ * - if true: go to the previous key if it's true
+ * - if false: stay on the exact key
+ *
+ * If the exact key is not found, then go to the previous key *if possible*, but remember to
+ * leave the scanner in a valid state if possible.
+ *
+ * @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker
+ * @param offset offset into the keyOnlyBytes array
+ * @param length number of bytes of the keyOnlyBytes array to use
+ * @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up 1 Cell
+ * @return 0 if the seeker is on the exact key
+ * 1 if the seeker is not on the key for any reason, including seekBefore being true
+ */
+ @Override
+ public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length,
+ boolean forceBeforeOnExactMatch) {
+ if (USE_POSITION_BEFORE) {
+ return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length,
+ forceBeforeOnExactMatch);
+ }else{
+ return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length,
+ forceBeforeOnExactMatch);
+ }
+ }
+
+
+
+ /*
+ * Support both of these options since the underlying PrefixTree supports both. Possibly
+ * expand the EncodedSeeker to utilize them both.
+ */
+
+ protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length,
+ boolean forceBeforeOnExactMatch){
+ // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
+ KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
+
+ CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
+
+ if(CellScannerPosition.AT == position){
+ if (forceBeforeOnExactMatch) {
+ ptSearcher.previous();
+ return 1;
+ }
+ return 0;
+ }
+
+ return 1;
+ }
+
+
+ protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length,
+ boolean forceBeforeOnExactMatch){
+ // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
+ KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
+
+ //should probably switch this to use the seekForwardToOrBefore method
+ CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
+
+ if(CellScannerPosition.AT == position){
+ if (forceBeforeOnExactMatch) {
+ ptSearcher.previous();
+ return 1;
+ }
+ return 0;
+
+ }
+
+ if(CellScannerPosition.AFTER == position){
+ if(!ptSearcher.isBeforeFirst()){
+ ptSearcher.previous();
+ }
+ return 1;
+ }
+
+ if(position == CellScannerPosition.AFTER_LAST){
+ return 1;
+ }
+
+ throw new RuntimeException("unexpected CellScannerPosition:"+position);
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java
new file mode 100644
index 0000000..e1a92e0
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import java.nio.ByteBuffer;
+import java.util.Queue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
+ * objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
+ * Gets/second, reusing these searchers may save a lot of young gen collections.
+ *
+ * Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
+ */
+@InterfaceAudience.Private
+public class ArraySearcherPool {
+
+ /**
+ * One decoder is needed for each storefile for each Get operation so we may need hundreds at the
+ * same time, however, decoding is a CPU bound activity so should limit this to something in the
+ * realm of maximum reasonable active threads.
+ */
+ private static final Integer MAX_POOL_SIZE = 1000;
+
+ protected Queue pool
+ = new LinkedBlockingQueue(MAX_POOL_SIZE);
+
+ public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) {
+ PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
+ searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
+ return searcher;
+ }
+
+ public void checkIn(PrefixTreeArraySearcher searcher) {
+ searcher.releaseBlockReference();
+ pool.offer(searcher);
+ }
+
+ @Override
+ public String toString() {
+ return ("poolSize:" + pool.size());
+ }
+
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java
new file mode 100644
index 0000000..90c22b7
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
+
+/**
+ * Static wrapper class for the ArraySearcherPool.
+ */
+@InterfaceAudience.Private
+public class DecoderFactory {
+
+ private static final ArraySearcherPool POOL = new ArraySearcherPool();
+
+ //TODO will need a PrefixTreeSearcher on top of CellSearcher
+ public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer,
+ boolean includeMvccVersion) {
+ if (buffer.isDirect()) {
+ throw new IllegalArgumentException("DirectByteBuffers not supported yet");
+ // TODO implement PtByteBufferBlockScanner
+ }
+
+ PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
+ includeMvccVersion);
+ return searcher;
+ }
+
+ public static void checkIn(CellSearcher pSearcher) {
+ if (pSearcher == null) {
+ return;
+ }
+ if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
+ throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
+ +DecoderFactory.class);
+ }
+ PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
+ POOL.checkIn(searcher);
+ }
+
+
+ /**************************** helper ******************************/
+
+ public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer,
+ PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
+ if (searcher == null) {
+ PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
+ searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
+ blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength());
+ searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
+ return searcher;
+ }
+
+ PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
+ blockMeta.initOnBlock(buffer);
+ if (!searcher.areBuffersBigEnough()) {
+ int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
+ searcher.getMaxRowTreeStackNodes());
+ int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
+ int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
+ searcher.getQualifierBufferLength());
+ searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
+ qualifierBufferLength);
+ }
+ //this is where we parse the BlockMeta
+ searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
+ return searcher;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java
new file mode 100644
index 0000000..5e1f48b
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
+
+/**
+ * Methods for going backwards through a PrefixTree block. This class is split out on its own to
+ * simplify the Scanner superclass and Searcher subclass.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
+ ReversibleCellScanner {
+
+ /***************** construct ******************************/
+
+ public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
+ int rowBufferLength, int qualifierBufferLength) {
+ super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
+ }
+
+
+ /***************** Object methods ***************************/
+
+ @Override
+ public boolean equals(Object obj) {
+ //trivial override to confirm intent (findbugs)
+ return super.equals(obj);
+ }
+
+
+ /***************** methods **********************************/
+
+ @Override
+ public boolean previous() {
+ if (afterLast) {
+ afterLast = false;
+ positionAtLastCell();
+ return true;
+ }
+ if (beforeFirst) {
+ return false;
+ }
+ if (isFirstCellInRow()) {
+ previousRowInternal();
+ if (beforeFirst) {
+ return false;
+ }
+ populateLastNonRowFields();
+ return true;
+ }
+ populatePreviousNonRowFields();
+ return true;
+ }
+
+ @Override
+ public boolean previousRow(boolean endOfRow) {
+ previousRowInternal();
+ if(beforeFirst){
+ return false;
+ }
+ if(endOfRow){
+ populateLastNonRowFields();
+ }else{
+ populateFirstNonRowFields();
+ }
+ return true;
+ }
+
+ private boolean previousRowInternal() {
+ if (beforeFirst) {
+ return false;
+ }
+ if (afterLast) {
+ positionAtLastRow();
+ return true;
+ }
+ if (currentRowNode.hasOccurrences()) {
+ discardCurrentRowNode(false);
+ if(currentRowNode==null){
+ return false;
+ }
+ }
+ while (!beforeFirst) {
+ if (isDirectlyAfterNub()) {//we are about to back up to the nub
+ currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
+ nubCellsRemain = true;//this positions us on the nub
+ return true;
+ }
+ if (currentRowNode.hasPreviousFanNodes()) {
+ followPreviousFan();
+ descendToLastRowFromCurrentPosition();
+ } else {// keep going up the stack until we find previous fan positions
+ discardCurrentRowNode(false);
+ if(currentRowNode==null){
+ return false;
+ }
+ }
+ if (currentRowNode.hasOccurrences()) {// escape clause
+ return true;// found some values
+ }
+ }
+ return false;// went past the beginning
+ }
+
+ protected boolean isDirectlyAfterNub() {
+ return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
+ }
+
+ protected void positionAtLastRow() {
+ reInitFirstNode();
+ descendToLastRowFromCurrentPosition();
+ }
+
+ protected void descendToLastRowFromCurrentPosition() {
+ while (currentRowNode.hasChildren()) {
+ followLastFan();
+ }
+ }
+
+ protected void positionAtLastCell() {
+ positionAtLastRow();
+ populateLastNonRowFields();
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java
new file mode 100644
index 0000000..6cb670f
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java
@@ -0,0 +1,506 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
+import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
+
+/**
+ * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
+ * call get/set methods.
+ *
+ * This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
+ * implementation requires that the bytes be in a normal java byte[] for performance. The
+ * alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
+ * without copying the whole buffer on-heap.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
+
+ /***************** fields ********************************/
+
+ protected PrefixTreeBlockMeta blockMeta;
+
+ protected boolean beforeFirst;
+ protected boolean afterLast;
+
+ protected RowNodeReader[] rowNodes;
+ protected int rowNodeStackIndex;
+
+ protected RowNodeReader currentRowNode;
+ protected ColumnReader familyReader;
+ protected ColumnReader qualifierReader;
+ protected TimestampDecoder timestampDecoder;
+ protected MvccVersionDecoder mvccVersionDecoder;
+
+ protected boolean nubCellsRemain;
+ protected int currentCellIndex;
+
+
+ /*********************** construct ******************************/
+
+ // pass in blockMeta so we can initialize buffers big enough for all cells in the block
+ public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
+ int rowBufferLength, int qualifierBufferLength) {
+ this.rowNodes = new RowNodeReader[rowTreeDepth];
+ for (int i = 0; i < rowNodes.length; ++i) {
+ rowNodes[i] = new RowNodeReader();
+ }
+ this.rowBuffer = new byte[rowBufferLength];
+ this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
+ this.familyReader = new ColumnReader(familyBuffer, true);
+ this.qualifierBuffer = new byte[qualifierBufferLength];
+ this.qualifierReader = new ColumnReader(qualifierBuffer, false);
+ this.timestampDecoder = new TimestampDecoder();
+ this.mvccVersionDecoder = new MvccVersionDecoder();
+ }
+
+
+ /**************** init helpers ***************************************/
+
+ /**
+ * Call when first accessing a block.
+ * @return entirely new scanner if false
+ */
+ public boolean areBuffersBigEnough() {
+ if (rowNodes.length < blockMeta.getRowTreeDepth()) {
+ return false;
+ }
+ if (rowBuffer.length < blockMeta.getMaxRowLength()) {
+ return false;
+ }
+ if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
+ return false;
+ }
+ return true;
+ }
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block,
+ boolean includeMvccVersion) {
+ this.block = block;
+ this.blockMeta = blockMeta;
+ this.familyOffset = familyBuffer.length;
+ this.familyReader.initOnBlock(blockMeta, block);
+ this.qualifierOffset = qualifierBuffer.length;
+ this.qualifierReader.initOnBlock(blockMeta, block);
+ this.timestampDecoder.initOnBlock(blockMeta, block);
+ this.mvccVersionDecoder.initOnBlock(blockMeta, block);
+ this.includeMvccVersion = includeMvccVersion;
+ resetToBeforeFirstEntry();
+ }
+
+ // Does this have to be in the CellScanner Interface? TODO
+ public void resetToBeforeFirstEntry() {
+ beforeFirst = true;
+ afterLast = false;
+ rowNodeStackIndex = -1;
+ currentRowNode = null;
+ rowLength = 0;
+ familyOffset = familyBuffer.length;
+ familyLength = 0;
+ qualifierOffset = blockMeta.getMaxQualifierLength();
+ qualifierLength = 0;
+ nubCellsRemain = false;
+ currentCellIndex = -1;
+ timestamp = -1L;
+ type = DEFAULT_TYPE;
+ absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
+ valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
+ }
+
+ /**
+ * Call this before putting the scanner back into a pool so it doesn't hold the last used block
+ * in memory.
+ */
+ public void releaseBlockReference(){
+ block = null;
+ }
+
+
+ /********************** CellScanner **********************/
+
+ @Override
+ public Cell current() {
+ if(isOutOfBounds()){
+ return null;
+ }
+ return (Cell)this;
+ }
+
+ /******************* Object methods ************************/
+
+ @Override
+ public boolean equals(Object obj) {
+ //trivial override to confirm intent (findbugs)
+ return super.equals(obj);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode();
+ }
+
+ /**
+ * Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
+ */
+ @Override
+ public String toString() {
+ Cell currentCell = current();
+ if(currentCell==null){
+ return "null";
+ }
+ return ((PrefixTreeCell)currentCell).getKeyValueString();
+ }
+
+
+ /******************* advance ***************************/
+
+ public boolean positionAtFirstCell() {
+ reInitFirstNode();
+ return advance();
+ }
+
+ @Override
+ public boolean advance() {
+ if (afterLast) {
+ return false;
+ }
+ if (!hasOccurrences()) {
+ resetToBeforeFirstEntry();
+ }
+ if (beforeFirst || isLastCellInRow()) {
+ nextRow();
+ if (afterLast) {
+ return false;
+ }
+ } else {
+ ++currentCellIndex;
+ }
+
+ populateNonRowFields(currentCellIndex);
+ return true;
+ }
+
+
+ public boolean nextRow() {
+ nextRowInternal();
+ if (afterLast) {
+ return false;
+ }
+ populateNonRowFields(currentCellIndex);
+ return true;
+ }
+
+
+ /**
+ * This method is safe to call when the scanner is not on a fully valid row node, as in the case
+ * of a row token miss in the Searcher
+ * @return true if we are positioned on a valid row, false if past end of block
+ */
+ protected boolean nextRowInternal() {
+ if (afterLast) {
+ return false;
+ }
+ if (beforeFirst) {
+ initFirstNode();
+ if (currentRowNode.hasOccurrences()) {
+ if (currentRowNode.isNub()) {
+ nubCellsRemain = true;
+ }
+ currentCellIndex = 0;
+ return true;
+ }
+ }
+ if (currentRowNode.isLeaf()) {
+ discardCurrentRowNode(true);
+ }
+ while (!afterLast) {
+ if (nubCellsRemain) {
+ nubCellsRemain = false;
+ }
+ if (currentRowNode.hasMoreFanNodes()) {
+ followNextFan();
+ if (currentRowNode.hasOccurrences()) {
+ currentCellIndex = 0;
+ return true;
+ }// found some values
+ } else {
+ discardCurrentRowNode(true);
+ }
+ }
+ return false;// went past the end
+ }
+
+
+ /**************** secondary traversal methods ******************************/
+
+ protected void reInitFirstNode() {
+ resetToBeforeFirstEntry();
+ initFirstNode();
+ }
+
+ protected void initFirstNode() {
+ int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
+ rowNodeStackIndex = 0;
+ currentRowNode = rowNodes[0];
+ currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
+ appendCurrentTokenToRowBuffer();
+ beforeFirst = false;
+ }
+
+ protected void followFirstFan() {
+ followFan(0);
+ }
+
+ protected void followPreviousFan() {
+ int nextFanPosition = currentRowNode.getFanIndex() - 1;
+ followFan(nextFanPosition);
+ }
+
+ protected void followCurrentFan() {
+ int currentFanPosition = currentRowNode.getFanIndex();
+ followFan(currentFanPosition);
+ }
+
+ protected void followNextFan() {
+ int nextFanPosition = currentRowNode.getFanIndex() + 1;
+ followFan(nextFanPosition);
+ }
+
+ protected void followLastFan() {
+ followFan(currentRowNode.getLastFanIndex());
+ }
+
+ protected void followFan(int fanIndex) {
+ currentRowNode.setFanIndex(fanIndex);
+ appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
+
+ int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
+ + currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
+ ++rowNodeStackIndex;
+
+ currentRowNode = rowNodes[rowNodeStackIndex];
+ currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
+
+ //TODO getToken is spewing garbage
+ appendCurrentTokenToRowBuffer();
+ if (currentRowNode.isNub()) {
+ nubCellsRemain = true;
+ }
+ currentCellIndex = 0;
+ }
+
+ /**
+ * @param forwards which marker to set if we overflow
+ */
+ protected void discardCurrentRowNode(boolean forwards) {
+ RowNodeReader rowNodeBeingPopped = currentRowNode;
+ --rowNodeStackIndex;// pop it off the stack
+ if (rowNodeStackIndex < 0) {
+ currentRowNode = null;
+ if (forwards) {
+ markAfterLast();
+ } else {
+ markBeforeFirst();
+ }
+ return;
+ }
+ popFromRowBuffer(rowNodeBeingPopped);
+ currentRowNode = rowNodes[rowNodeStackIndex];
+ }
+
+ protected void markBeforeFirst() {
+ beforeFirst = true;
+ afterLast = false;
+ currentRowNode = null;
+ }
+
+ protected void markAfterLast() {
+ beforeFirst = false;
+ afterLast = true;
+ currentRowNode = null;
+ }
+
+
+ /***************** helper methods **************************/
+
+ protected void appendCurrentTokenToRowBuffer() {
+ System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
+ currentRowNode.getTokenLength());
+ rowLength += currentRowNode.getTokenLength();
+ }
+
+ protected void appendToRowBuffer(byte b) {
+ rowBuffer[rowLength] = b;
+ ++rowLength;
+ }
+
+ protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
+ rowLength -= rowNodeBeingPopped.getTokenLength();
+ --rowLength; // pop the parent's fan byte
+ }
+
+ protected boolean hasOccurrences() {
+ return currentRowNode != null && currentRowNode.hasOccurrences();
+ }
+
+ protected boolean isBranch() {
+ return currentRowNode != null && !currentRowNode.hasOccurrences()
+ && currentRowNode.hasChildren();
+ }
+
+ protected boolean isNub() {
+ return currentRowNode != null && currentRowNode.hasOccurrences()
+ && currentRowNode.hasChildren();
+ }
+
+ protected boolean isLeaf() {
+ return currentRowNode != null && currentRowNode.hasOccurrences()
+ && !currentRowNode.hasChildren();
+ }
+
+ //TODO expose this in a PrefixTreeScanner interface
+ public boolean isBeforeFirst(){
+ return beforeFirst;
+ }
+
+ public boolean isAfterLast(){
+ return afterLast;
+ }
+
+ protected boolean isOutOfBounds(){
+ return beforeFirst || afterLast;
+ }
+
+ protected boolean isFirstCellInRow() {
+ return currentCellIndex == 0;
+ }
+
+ protected boolean isLastCellInRow() {
+ return currentCellIndex == currentRowNode.getLastCellIndex();
+ }
+
+
+ /********************* fill in family/qualifier/ts/type/value ************/
+
+ protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
+ populateNonRowFields(cellNum);
+ return CellComparator.compareStatic(this, key);
+ }
+
+ protected void populateFirstNonRowFields() {
+ populateNonRowFields(0);
+ }
+
+ protected void populatePreviousNonRowFields() {
+ populateNonRowFields(currentCellIndex - 1);
+ }
+
+ protected void populateLastNonRowFields() {
+ populateNonRowFields(currentRowNode.getLastCellIndex());
+ }
+
+ protected void populateNonRowFields(int cellIndex) {
+ currentCellIndex = cellIndex;
+ populateFamily();
+ populateQualifier();
+ populateTimestamp();
+ populateMvccVersion();
+ populateType();
+ populateValueOffsets();
+ }
+
+ protected void populateFamily() {
+ int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
+ familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
+ familyLength = familyReader.getColumnLength();
+ }
+
+ protected void populateQualifier() {
+ int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
+ qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
+ qualifierLength = qualifierReader.getColumnLength();
+ }
+
+ protected void populateTimestamp() {
+ if (blockMeta.isAllSameTimestamp()) {
+ timestamp = blockMeta.getMinTimestamp();
+ } else {
+ int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
+ timestamp = timestampDecoder.getLong(timestampIndex);
+ }
+ }
+
+ protected void populateMvccVersion() {
+ if (blockMeta.isAllSameMvccVersion()) {
+ mvccVersion = blockMeta.getMinMvccVersion();
+ } else {
+ int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
+ blockMeta);
+ mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
+ }
+ }
+
+ protected void populateType() {
+ int typeInt;
+ if (blockMeta.isAllSameType()) {
+ typeInt = blockMeta.getAllTypes();
+ } else {
+ typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
+ }
+ type = PrefixTreeCell.TYPES[typeInt];
+ }
+
+ protected void populateValueOffsets() {
+ int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
+ absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
+ valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
+ }
+
+
+ /**************** getters ***************************/
+
+ public byte[] getTreeBytes() {
+ return block;
+ }
+
+ public PrefixTreeBlockMeta getBlockMeta() {
+ return blockMeta;
+ }
+
+ public int getMaxRowTreeStackNodes() {
+ return rowNodes.length;
+ }
+
+ public int getRowBufferLength() {
+ return rowBuffer.length;
+ }
+
+ public int getQualifierBufferLength() {
+ return qualifierBuffer.length;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java
new file mode 100644
index 0000000..097a997
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java
@@ -0,0 +1,405 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
+import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
+
+import com.google.common.primitives.UnsignedBytes;
+
+/**
+ * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
+ * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
+ * set up to be a Trie of rows, so finding a particular row is extremely cheap.
+ *
+ * Once it finds the row, it does a binary search through the cells inside the row, which is not as
+ * fast as the trie search, but faster than iterating through every cell like existing block
+ * formats
+ * do. For this reason, this implementation is targeted towards schemas where rows are narrow
+ * enough
+ * to have several or many per block, and where you are generally looking for the entire row or
+ * the
+ * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
+ CellSearcher {
+
+ /*************** construct ******************************/
+
+ public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
+ int rowBufferLength, int qualifierBufferLength) {
+ super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
+ }
+
+
+ /********************* CellSearcher methods *******************/
+
+ @Override
+ public boolean positionAt(Cell key) {
+ return CellScannerPosition.AT == positionAtOrAfter(key);
+ }
+
+ @Override
+ public CellScannerPosition positionAtOrBefore(Cell key) {
+ reInitFirstNode();
+ int fanIndex = -1;
+
+ while(true){
+ //detect row mismatch. break loop if mismatch
+ int currentNodeDepth = rowLength;
+ int rowTokenComparison = compareToCurrentToken(key);
+ if(rowTokenComparison != 0){
+ return fixRowTokenMissReverse(rowTokenComparison);
+ }
+
+ //exact row found, move on to qualifier & ts
+ if(rowMatchesAfterCurrentPosition(key)){
+ return positionAtQualifierTimestamp(key, true);
+ }
+
+ //detect dead end (no fan to descend into)
+ if(!currentRowNode.hasFan()){
+ if(hasOccurrences()){//must be leaf or nub
+ populateLastNonRowFields();
+ return CellScannerPosition.BEFORE;
+ }else{
+ //TODO i don't think this case is exercised by any tests
+ return fixRowFanMissReverse(0);
+ }
+ }
+
+ //keep hunting for the rest of the row
+ byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
+ fanIndex = currentRowNode.whichFanNode(searchForByte);
+ if(fanIndex < 0){//no matching row. return early
+ int insertionPoint = -fanIndex;
+ return fixRowFanMissReverse(insertionPoint);
+ }
+ //found a match, so dig deeper into the tree
+ followFan(fanIndex);
+ }
+ }
+
+ /**
+ * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
+ * if-statements. Priority on readability and debugability.
+ */
+ @Override
+ public CellScannerPosition positionAtOrAfter(Cell key) {
+ reInitFirstNode();
+ int fanIndex = -1;
+
+ while(true){
+ //detect row mismatch. break loop if mismatch
+ int currentNodeDepth = rowLength;
+ int rowTokenComparison = compareToCurrentToken(key);
+ if(rowTokenComparison != 0){
+ return fixRowTokenMissForward(rowTokenComparison);
+ }
+
+ //exact row found, move on to qualifier & ts
+ if(rowMatchesAfterCurrentPosition(key)){
+ return positionAtQualifierTimestamp(key, false);
+ }
+
+ //detect dead end (no fan to descend into)
+ if(!currentRowNode.hasFan()){
+ if(hasOccurrences()){
+ populateFirstNonRowFields();
+ return CellScannerPosition.AFTER;
+ }else{
+ //TODO i don't think this case is exercised by any tests
+ return fixRowFanMissForward(0);
+ }
+ }
+
+ //keep hunting for the rest of the row
+ byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
+ fanIndex = currentRowNode.whichFanNode(searchForByte);
+ if(fanIndex < 0){//no matching row. return early
+ int insertionPoint = -fanIndex;
+ return fixRowFanMissForward(insertionPoint);
+ }
+ //found a match, so dig deeper into the tree
+ followFan(fanIndex);
+ }
+ }
+
+ @Override
+ public boolean seekForwardTo(Cell key) {
+ if(currentPositionIsAfter(key)){
+ //our position is after the requested key, so can't do anything
+ return false;
+ }
+ return positionAt(key);
+ }
+
+ @Override
+ public CellScannerPosition seekForwardToOrBefore(Cell key) {
+ //Do we even need this check or should upper layers avoid this situation. It's relatively
+ //expensive compared to the rest of the seek operation.
+ if(currentPositionIsAfter(key)){
+ //our position is after the requested key, so can't do anything
+ return CellScannerPosition.AFTER;
+ }
+
+ return positionAtOrBefore(key);
+ }
+
+ @Override
+ public CellScannerPosition seekForwardToOrAfter(Cell key) {
+ //Do we even need this check or should upper layers avoid this situation. It's relatively
+ //expensive compared to the rest of the seek operation.
+ if(currentPositionIsAfter(key)){
+ //our position is after the requested key, so can't do anything
+ return CellScannerPosition.AFTER;
+ }
+
+ return positionAtOrAfter(key);
+ }
+
+ /**
+ * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
+ */
+ @Override
+ public void positionAfterLastCell() {
+ resetToBeforeFirstEntry();
+ beforeFirst = false;
+ afterLast = true;
+ }
+
+
+ /***************** Object methods ***************************/
+
+ @Override
+ public boolean equals(Object obj) {
+ //trivial override to confirm intent (findbugs)
+ return super.equals(obj);
+ }
+
+
+ /****************** internal methods ************************/
+
+ protected boolean currentPositionIsAfter(Cell cell){
+ return compareTo(cell) > 0;
+ }
+
+ protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
+ int minIndex = 0;
+ int maxIndex = currentRowNode.getLastCellIndex();
+ int diff;
+ while (true) {
+ int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
+ diff = populateNonRowFieldsAndCompareTo(midIndex, key);
+
+ if (diff == 0) {// found exact match
+ return CellScannerPosition.AT;
+ } else if (minIndex == maxIndex) {// even termination case
+ break;
+ } else if ((minIndex + 1) == maxIndex) {// odd termination case
+ diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
+ if(diff > 0){
+ diff = populateNonRowFieldsAndCompareTo(minIndex, key);
+ }
+ break;
+ } else if (diff < 0) {// keep going forward
+ minIndex = currentCellIndex;
+ } else {// went past it, back up
+ maxIndex = currentCellIndex;
+ }
+ }
+
+ if (diff == 0) {
+ return CellScannerPosition.AT;
+
+ } else if (diff < 0) {// we are before key
+ if (beforeOnMiss) {
+ return CellScannerPosition.BEFORE;
+ }
+ if (advance()) {
+ return CellScannerPosition.AFTER;
+ }
+ return CellScannerPosition.AFTER_LAST;
+
+ } else {// we are after key
+ if (!beforeOnMiss) {
+ return CellScannerPosition.AFTER;
+ }
+ if (previous()) {
+ return CellScannerPosition.BEFORE;
+ }
+ return CellScannerPosition.BEFORE_FIRST;
+ }
+ }
+
+ /**
+ * compare this.row to key.row but starting at the current rowLength
+ * @param key Cell being searched for
+ * @return true if row buffer contents match key.row
+ */
+ protected boolean rowMatchesAfterCurrentPosition(Cell key) {
+ if (!currentRowNode.hasOccurrences()) {
+ return false;
+ }
+ int thatRowLength = key.getRowLength();
+ if (rowLength != thatRowLength) {
+ return false;
+ }
+ return true;
+ }
+
+ // TODO move part of this to Cell comparator?
+ /**
+ * Compare only the bytes within the window of the current token
+ * @param key
+ * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
+ */
+ protected int compareToCurrentToken(Cell key) {
+ int startIndex = rowLength - currentRowNode.getTokenLength();
+ int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
+ for (int i = startIndex; i < endIndexExclusive; ++i) {
+ if (i >= key.getRowLength()) {// key was shorter, so it's first
+ return -1;
+ }
+ byte keyByte = CellUtil.getRowByte(key, i);
+ byte thisByte = rowBuffer[i];
+ if (keyByte == thisByte) {
+ continue;
+ }
+ return UnsignedBytes.compare(keyByte, thisByte);
+ }
+ return 0;
+ }
+
+ protected void followLastFansUntilExhausted(){
+ while(currentRowNode.hasFan()){
+ followLastFan();
+ }
+ }
+
+
+ /****************** complete seek when token mismatch ******************/
+
+ /**
+ * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position
+ */
+ protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
+ if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
+ boolean foundPreviousRow = previousRow(true);
+ if(foundPreviousRow){
+ populateLastNonRowFields();
+ return CellScannerPosition.BEFORE;
+ }else{
+ return CellScannerPosition.BEFORE_FIRST;
+ }
+
+ }else{//searcher position is before the input key
+ if(currentRowNode.hasOccurrences()){
+ populateFirstNonRowFields();
+ return CellScannerPosition.BEFORE;
+ }
+ boolean foundNextRow = nextRow();
+ if(foundNextRow){
+ return CellScannerPosition.AFTER;
+ }else{
+ return CellScannerPosition.AFTER_LAST;
+ }
+ }
+ }
+
+ /**
+ * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position
+ */
+ protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
+ if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
+ if(currentRowNode.hasOccurrences()){
+ populateFirstNonRowFields();
+ return CellScannerPosition.AFTER;
+ }
+ boolean foundNextRow = nextRow();
+ if(foundNextRow){
+ return CellScannerPosition.AFTER;
+ }else{
+ return CellScannerPosition.AFTER_LAST;
+ }
+
+ }else{//searcher position is before the input key, so go forward
+ discardCurrentRowNode(true);
+ boolean foundNextRow = nextRow();
+ if(foundNextRow){
+ return CellScannerPosition.AFTER;
+ }else{
+ return CellScannerPosition.AFTER_LAST;
+ }
+ }
+ }
+
+
+ /****************** complete seek when fan mismatch ******************/
+
+ protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
+ if(fanInsertionPoint == 0){//we need to back up a row
+ boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
+ if(foundPreviousRow){
+ populateLastNonRowFields();
+ return CellScannerPosition.BEFORE;
+ }
+ return CellScannerPosition.BEFORE_FIRST;
+ }
+
+ //follow the previous fan, but then descend recursively forward
+ followFan(fanInsertionPoint - 1);
+ followLastFansUntilExhausted();
+ populateLastNonRowFields();
+ return CellScannerPosition.BEFORE;
+ }
+
+ protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
+ if(fanInsertionPoint >= currentRowNode.getFanOut()){
+ discardCurrentRowNode(true);
+ if (!nextRow()) {
+ return CellScannerPosition.AFTER_LAST;
+ } else {
+ return CellScannerPosition.AFTER;
+ }
+ }
+
+ followFan(fanInsertionPoint);
+ if(hasOccurrences()){
+ populateFirstNonRowFields();
+ return CellScannerPosition.AFTER;
+ }
+
+ if(nextRowInternal()){
+ populateFirstNonRowFields();
+ return CellScannerPosition.AFTER;
+
+ }else{
+ return CellScannerPosition.AFTER_LAST;
+ }
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java
new file mode 100644
index 0000000..b4ce25f
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparator;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueUtil;
+
+/**
+ * As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields
+ * of this class so that Cell logic can be applied, but without allocating new memory for every Cell
+ * iterated through.
+ */
+@InterfaceAudience.Private
+public class PrefixTreeCell implements Cell, Comparable {
+
+ /********************** static **********************/
+
+ public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
+ static {
+ for (KeyValue.Type type : KeyValue.Type.values()) {
+ TYPES[type.getCode() & 0xff] = type;
+ }
+ }
+
+ //Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized.
+ public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
+
+ /******************** fields ************************/
+
+ protected byte[] block;
+ //we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler
+ protected boolean includeMvccVersion;
+
+ protected byte[] rowBuffer;
+ protected int rowLength;
+
+ protected byte[] familyBuffer;
+ protected int familyOffset;
+ protected int familyLength;
+
+ protected byte[] qualifierBuffer;// aligned to the end of the array
+ protected int qualifierOffset;
+ protected int qualifierLength;
+
+ protected Long timestamp;
+ protected Long mvccVersion;
+
+ protected KeyValue.Type type;
+
+ protected int absoluteValueOffset;
+ protected int valueLength;
+
+
+ /********************** Cell methods ******************/
+
+ /**
+ * For debugging. Currently creates new KeyValue to utilize its toString() method.
+ */
+ @Override
+ public String toString() {
+ return getKeyValueString();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Cell)) {
+ return false;
+ }
+ //Temporary hack to maintain backwards compatibility with KeyValue.equals
+ return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj);
+
+ //TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
+ }
+
+ @Override
+ public int hashCode(){
+ //Temporary hack to maintain backwards compatibility with KeyValue.hashCode
+ //I don't think this is used in any hot code paths
+ return KeyValueUtil.copyToNewKeyValue(this).hashCode();
+
+ //TODO return CellComparator.hashCode(this);//see HBASE-6907
+ }
+
+ @Override
+ public int compareTo(Cell other) {
+ return CellComparator.compareStatic(this, other);
+ }
+
+ @Override
+ public long getTimestamp() {
+ return timestamp;
+ }
+
+ @Override
+ public long getMvccVersion() {
+ if (!includeMvccVersion) {
+ return 0L;
+ }
+ return mvccVersion;
+ }
+
+ @Override
+ public int getValueLength() {
+ return valueLength;
+ }
+
+ @Override
+ public byte[] getRowArray() {
+ return rowBuffer;
+ }
+
+ @Override
+ public int getRowOffset() {
+ return 0;
+ }
+
+ @Override
+ public short getRowLength() {
+ return (short) rowLength;
+ }
+
+ @Override
+ public byte[] getFamilyArray() {
+ return familyBuffer;
+ }
+
+ @Override
+ public int getFamilyOffset() {
+ return familyOffset;
+ }
+
+ @Override
+ public byte getFamilyLength() {
+ return (byte) familyLength;
+ }
+
+ @Override
+ public byte[] getQualifierArray() {
+ return qualifierBuffer;
+ }
+
+ @Override
+ public int getQualifierOffset() {
+ return qualifierOffset;
+ }
+
+ @Override
+ public int getQualifierLength() {
+ return qualifierLength;
+ }
+
+ @Override
+ public byte[] getValueArray() {
+ return block;
+ }
+
+ @Override
+ public int getValueOffset() {
+ return absoluteValueOffset;
+ }
+
+ @Override
+ public byte getTypeByte() {
+ return type.getCode();
+ }
+
+
+ /************************* helper methods *************************/
+
+ /**
+ * Need this separate method so we can call it from subclasses' toString() methods
+ */
+ protected String getKeyValueString(){
+ KeyValue kv = KeyValueUtil.copyToNewKeyValue(this);
+ return kv.toString();
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java
new file mode 100644
index 0000000..e9cf05d
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+import org.apache.hadoop.hbase.util.vint.UVIntTool;
+
+@InterfaceAudience.Private
+public class ColumnNodeReader {
+
+ /**************** fields ************************/
+
+ protected PrefixTreeBlockMeta blockMeta;
+ protected byte[] block;
+
+ protected byte[] columnBuffer;
+ protected boolean familyVsQualifier;
+
+ protected int offsetIntoBlock;
+
+ protected int tokenOffsetIntoBlock;
+ protected int tokenLength;
+ protected int parentStartPosition;
+
+
+ /************** construct *************************/
+
+ public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) {
+ this.columnBuffer = columnBuffer;
+ this.familyVsQualifier = familyVsQualifier;
+ }
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
+ this.blockMeta = blockMeta;
+ this.block = block;
+ }
+
+
+ /************* methods *****************************/
+
+ public void positionAt(int offsetIntoBlock) {
+ this.offsetIntoBlock = offsetIntoBlock;
+ tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
+ tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
+ int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
+ int offsetWidth;
+ if (familyVsQualifier) {
+ offsetWidth = blockMeta.getFamilyOffsetWidth();
+ } else {
+ offsetWidth = blockMeta.getQualifierOffsetWidth();
+ }
+ parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
+ }
+
+ public void prependTokenToBuffer(int bufferStartIndex) {
+ System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
+ }
+
+ public boolean isRoot() {
+ if (familyVsQualifier) {
+ return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
+ } else {
+ return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
+ }
+ }
+
+
+ /************** standard methods *********************/
+
+ @Override
+ public String toString() {
+ return super.toString() + "[" + offsetIntoBlock + "]";
+ }
+
+
+ /****************** get/set ****************************/
+
+ public int getTokenLength() {
+ return tokenLength;
+ }
+
+ public int getParentStartPosition() {
+ return parentStartPosition;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java
new file mode 100644
index 0000000..2b04a4b
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+
+/**
+ * Position one of these appropriately in the data block and you can call its methods to retrieve
+ * the family or qualifier at the current position.
+ */
+@InterfaceAudience.Private
+public class ColumnReader {
+
+ /****************** fields *************************/
+
+ protected PrefixTreeBlockMeta blockMeta;
+
+ protected byte[] columnBuffer;
+ protected int columnOffset;
+ protected int columnLength;
+ protected boolean familyVsQualifier;
+
+ protected ColumnNodeReader columnNodeReader;
+
+
+ /******************** construct *******************/
+
+ public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) {
+ this.columnBuffer = columnBuffer;
+ this.familyVsQualifier = familyVsQualifier;
+ this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier);
+ }
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
+ this.blockMeta = blockMeta;
+ clearColumnBuffer();
+ columnNodeReader.initOnBlock(blockMeta, block);
+ }
+
+
+ /********************* methods *******************/
+
+ public ColumnReader populateBuffer(int offsetIntoColumnData) {
+ clearColumnBuffer();
+ int nextRelativeOffset = offsetIntoColumnData;
+ while (true) {
+ int absoluteOffset;
+ if (familyVsQualifier) {
+ absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
+ } else {
+ absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
+ }
+ columnNodeReader.positionAt(absoluteOffset);
+ columnOffset -= columnNodeReader.getTokenLength();
+ columnLength += columnNodeReader.getTokenLength();
+ columnNodeReader.prependTokenToBuffer(columnOffset);
+ if (columnNodeReader.isRoot()) {
+ return this;
+ }
+ nextRelativeOffset = columnNodeReader.getParentStartPosition();
+ }
+ }
+
+ public byte[] copyBufferToNewArray() {// for testing
+ byte[] out = new byte[columnLength];
+ System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
+ return out;
+ }
+
+ public int getColumnLength() {
+ return columnLength;
+ }
+
+ public void clearColumnBuffer() {
+ columnOffset = columnBuffer.length;
+ columnLength = 0;
+ }
+
+
+ /****************************** get/set *************************************/
+
+ public int getColumnOffset() {
+ return columnOffset;
+ }
+
+}
+
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java
new file mode 100644
index 0000000..ffe1e1a
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode.row;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+import org.apache.hadoop.hbase.util.vint.UVIntTool;
+
+/**
+ * Position one of these appropriately in the data block and you can call its methods to retrieve
+ * information necessary to decode the cells in the row.
+ */
+@InterfaceAudience.Private
+public class RowNodeReader {
+
+ /************* fields ***********************************/
+
+ protected byte[] block;
+ protected int offset;
+ protected int fanIndex;
+
+ protected int numCells;
+
+ protected int tokenOffset;
+ protected int tokenLength;
+ protected int fanOffset;
+ protected int fanOut;
+
+ protected int familyOffsetsOffset;
+ protected int qualifierOffsetsOffset;
+ protected int timestampIndexesOffset;
+ protected int mvccVersionIndexesOffset;
+ protected int operationTypesOffset;
+ protected int valueOffsetsOffset;
+ protected int valueLengthsOffset;
+ protected int nextNodeOffsetsOffset;
+
+
+ /******************* construct **************************/
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) {
+ this.block = block;
+
+ this.offset = offset;
+ resetFanIndex();
+
+ this.tokenLength = UVIntTool.getInt(block, offset);
+ this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
+
+ this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
+ this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
+
+ this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
+
+ this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
+ this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
+ this.timestampIndexesOffset = qualifierOffsetsOffset + numCells
+ * blockMeta.getQualifierOffsetWidth();
+ this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
+ * blockMeta.getTimestampIndexWidth();
+ this.operationTypesOffset = mvccVersionIndexesOffset + numCells
+ * blockMeta.getMvccVersionIndexWidth();
+ this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
+ this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
+ this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
+ }
+
+
+ /******************** methods ****************************/
+
+ public boolean isLeaf() {
+ return fanOut == 0;
+ }
+
+ public boolean isNub() {
+ return fanOut > 0 && numCells > 0;
+ }
+
+ public boolean isBranch() {
+ return fanOut > 0 && numCells == 0;
+ }
+
+ public boolean hasOccurrences() {
+ return numCells > 0;
+ }
+
+ public int getTokenArrayOffset(){
+ return tokenOffset;
+ }
+
+ public int getTokenLength() {
+ return tokenLength;
+ }
+
+ public byte getFanByte(int i) {
+ return block[fanOffset + i];
+ }
+
+ /**
+ * for debugging
+ */
+ protected String getFanByteReadable(int i){
+ return Bytes.toStringBinary(block, fanOffset + i, 1);
+ }
+
+ public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getFamilyOffsetWidth();
+ int startIndex = familyOffsetsOffset + fIntWidth * index;
+ return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ }
+
+ public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getQualifierOffsetWidth();
+ int startIndex = qualifierOffsetsOffset + fIntWidth * index;
+ return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ }
+
+ public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getTimestampIndexWidth();
+ int startIndex = timestampIndexesOffset + fIntWidth * index;
+ return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ }
+
+ public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getMvccVersionIndexWidth();
+ int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
+ return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ }
+
+ public int getType(int index, PrefixTreeBlockMeta blockMeta) {
+ if (blockMeta.isAllSameType()) {
+ return blockMeta.getAllTypes();
+ }
+ return block[operationTypesOffset + index];
+ }
+
+ public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getValueOffsetWidth();
+ int startIndex = valueOffsetsOffset + fIntWidth * index;
+ int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ return offset;
+ }
+
+ public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getValueLengthWidth();
+ int startIndex = valueLengthsOffset + fIntWidth * index;
+ int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ return length;
+ }
+
+ public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
+ int fIntWidth = blockMeta.getNextNodeOffsetWidth();
+ int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
+ return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
+ }
+
+ public String getBranchNubLeafIndicator() {
+ if (isNub()) {
+ return "N";
+ }
+ return isBranch() ? "B" : "L";
+ }
+
+ public boolean hasChildren() {
+ return fanOut > 0;
+ }
+
+ public int getLastFanIndex() {
+ return fanOut - 1;
+ }
+
+ public int getLastCellIndex() {
+ return numCells - 1;
+ }
+
+ public int getNumCells() {
+ return numCells;
+ }
+
+ public int getFanOut() {
+ return fanOut;
+ }
+
+ public byte[] getToken() {
+ // TODO pass in reusable ByteRange
+ return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray();
+ }
+
+ public int getOffset() {
+ return offset;
+ }
+
+ public int whichFanNode(byte searchForByte) {
+ if( ! hasFan()){
+ throw new IllegalStateException("This row node has no fan, so can't search it");
+ }
+ int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
+ searchForByte);
+ if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
+ return fanIndexInBlock - fanOffset;
+ }
+ return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse
+ }
+
+ public void resetFanIndex() {
+ fanIndex = -1;// just the way the logic currently works
+ }
+
+ public int getFanIndex() {
+ return fanIndex;
+ }
+
+ public void setFanIndex(int fanIndex) {
+ this.fanIndex = fanIndex;
+ }
+
+ public boolean hasFan(){
+ return fanOut > 0;
+ }
+
+ public boolean hasPreviousFanNodes() {
+ return fanOut > 0 && fanIndex > 0;
+ }
+
+ public boolean hasMoreFanNodes() {
+ return fanIndex < getLastFanIndex();
+ }
+
+ public boolean isOnLastFanNode() {
+ return !hasMoreFanNodes();
+ }
+
+
+ /*************** standard methods **************************/
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut));
+ sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength));
+ sb.append(",numCells:" + numCells);
+ sb.append(",fanIndex:"+fanIndex);
+ if(fanIndex>=0){
+ sb.append("("+getFanByteReadable(fanIndex)+")");
+ }
+ return sb.toString();
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java
new file mode 100644
index 0000000..4a53510
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+
+/**
+ * Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
+ */
+@InterfaceAudience.Private
+public class MvccVersionDecoder {
+
+ protected PrefixTreeBlockMeta blockMeta;
+ protected byte[] block;
+
+
+ /************** construct ***********************/
+
+ public MvccVersionDecoder() {
+ }
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
+ this.block = block;
+ this.blockMeta = blockMeta;
+ }
+
+
+ /************** methods *************************/
+
+ public long getMvccVersion(int index) {
+ if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
+ return blockMeta.getMinMvccVersion();
+ }
+ int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
+ + blockMeta.getMvccVersionDeltaWidth() * index;
+ long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
+ return blockMeta.getMinMvccVersion() + delta;
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java
new file mode 100644
index 0000000..cb7f412
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+
+/**
+ * Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
+ */
+@InterfaceAudience.Private
+public class TimestampDecoder {
+
+ protected PrefixTreeBlockMeta blockMeta;
+ protected byte[] block;
+
+
+ /************** construct ***********************/
+
+ public TimestampDecoder() {
+ }
+
+ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
+ this.block = block;
+ this.blockMeta = blockMeta;
+ }
+
+
+ /************** methods *************************/
+
+ public long getLong(int index) {
+ if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
+ return blockMeta.getMinTimestamp();
+ }
+ int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
+ * index;
+ long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
+ return blockMeta.getMinTimestamp() + delta;
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java
new file mode 100644
index 0000000..ba5340d
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode;
+
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
+ * ones retrieved from the pool for usage.
+ */
+@InterfaceAudience.Private
+public class EncoderFactory {
+
+ private static final EncoderPool POOL = new ThreadLocalEncoderPool();
+
+
+ public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
+ return POOL.checkOut(outputStream, includeMvccVersion);
+ }
+
+ public static void checkIn(PrefixTreeEncoder encoder) {
+ POOL.checkIn(encoder);
+ }
+
+
+ /**************************** helper ******************************/
+
+ protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
+ OutputStream outputStream, boolean includeMvccVersion) {
+ PrefixTreeEncoder ret = encoder;
+ if (encoder == null) {
+ ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
+ }
+ ret.reset(outputStream, includeMvccVersion);
+ return ret;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java
new file mode 100644
index 0000000..3b9df91
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode;
+
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+
+@InterfaceAudience.Private
+public interface EncoderPool {
+
+ PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
+ void checkIn(PrefixTreeEncoder encoder);
+
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
new file mode 100644
index 0000000..7817c38
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
@@ -0,0 +1,494 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
+import org.apache.hadoop.hbase.io.CellOutputStream;
+import org.apache.hadoop.hbase.util.ArrayUtils;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
+import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet;
+import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
+ * added they are completely copied into the various encoding structures. This is important because
+ * usually the cells being fed in during compactions will be transient.
+ *
+ * Usage:
+ * 1) constructor
+ * 4) append cells in sorted order: write(Cell cell)
+ * 5) flush()
+ */
+@InterfaceAudience.Private
+public class PrefixTreeEncoder implements CellOutputStream {
+
+ /**************** static ************************/
+
+ protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
+
+ //future-proof where HBase supports multiple families in a data block.
+ public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
+
+ private static final boolean USE_HASH_COLUMN_SORTER = true;
+ private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
+ private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
+
+
+ /**************** fields *************************/
+
+ protected long numResets = 0L;
+
+ protected OutputStream outputStream;
+
+ /*
+ * Cannot change during a single block's encoding. If false, then substitute incoming Cell's
+ * mvccVersion with zero and write out the block as usual.
+ */
+ protected boolean includeMvccVersion;
+
+ /*
+ * reusable ByteRanges used for communicating with the sorters/compilers
+ */
+ protected ByteRange rowRange;
+ protected ByteRange familyRange;
+ protected ByteRange qualifierRange;
+
+ /*
+ * incoming Cell fields are copied into these arrays
+ */
+ protected long[] timestamps;
+ protected long[] mvccVersions;
+ protected byte[] typeBytes;
+ protected int[] valueOffsets;
+ protected byte[] values;
+
+ protected PrefixTreeBlockMeta blockMeta;
+
+ /*
+ * Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
+ * compile before flushing.
+ */
+ protected LongEncoder timestampEncoder;
+ protected LongEncoder mvccVersionEncoder;
+ protected CellTypeEncoder cellTypeEncoder;
+
+ /*
+ * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
+ * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
+ * comparing only with the previous row key, families and qualifiers can arrive in unsorted order
+ * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
+ */
+ protected ByteRangeSet familyDeduplicator;
+ protected ByteRangeSet qualifierDeduplicator;
+
+ /*
+ * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
+ * trie structure with nodes connected by memory pointers (not serializable yet).
+ */
+ protected Tokenizer rowTokenizer;
+ protected Tokenizer familyTokenizer;
+ protected Tokenizer qualifierTokenizer;
+
+ /*
+ * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
+ * all information to an output stream of bytes that can be stored on disk.
+ */
+ protected RowSectionWriter rowWriter;
+ protected ColumnSectionWriter familyWriter;
+ protected ColumnSectionWriter qualifierWriter;
+
+ /*
+ * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
+ * were full KeyValues because some parts of HBase like to know the "unencoded size".
+ */
+ protected int totalCells = 0;
+ protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
+ protected int totalValueBytes = 0;
+ protected int maxValueLength = 0;
+ protected int totalBytes = 0;//
+
+
+ /***************** construct ***********************/
+
+ public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
+ // used during cell accumulation
+ this.blockMeta = new PrefixTreeBlockMeta();
+ this.rowRange = new ByteRange();
+ this.familyRange = new ByteRange();
+ this.qualifierRange = new ByteRange();
+ this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
+ this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
+ this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
+ this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
+ this.values = new byte[VALUE_BUFFER_INIT_SIZE];
+
+ // used during compilation
+ this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
+ : new ByteRangeTreeSet();
+ this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
+ : new ByteRangeTreeSet();
+ this.timestampEncoder = new LongEncoder();
+ this.mvccVersionEncoder = new LongEncoder();
+ this.cellTypeEncoder = new CellTypeEncoder();
+ this.rowTokenizer = new Tokenizer();
+ this.familyTokenizer = new Tokenizer();
+ this.qualifierTokenizer = new Tokenizer();
+ this.rowWriter = new RowSectionWriter();
+ this.familyWriter = new ColumnSectionWriter();
+ this.qualifierWriter = new ColumnSectionWriter();
+
+ reset(outputStream, includeMvccVersion);
+ }
+
+ public void reset(OutputStream outputStream, boolean includeMvccVersion) {
+ ++numResets;
+ this.includeMvccVersion = includeMvccVersion;
+ this.outputStream = outputStream;
+ valueOffsets[0] = 0;
+
+ familyDeduplicator.reset();
+ qualifierDeduplicator.reset();
+ rowTokenizer.reset();
+ timestampEncoder.reset();
+ mvccVersionEncoder.reset();
+ cellTypeEncoder.reset();
+ familyTokenizer.reset();
+ qualifierTokenizer.reset();
+ rowWriter.reset();
+ familyWriter.reset();
+ qualifierWriter.reset();
+
+ totalCells = 0;
+ totalUnencodedBytes = 0;
+ totalValueBytes = 0;
+ maxValueLength = 0;
+ totalBytes = 0;
+ }
+
+ /**
+ * Check that the arrays used to hold cell fragments are large enough for the cell that is being
+ * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
+ * first few block encodings but should stabilize quickly.
+ */
+ protected void ensurePerCellCapacities() {
+ int currentCapacity = valueOffsets.length;
+ int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
+ if (neededCapacity < currentCapacity) {
+ return;
+ }
+
+ int padding = neededCapacity;//this will double the array size
+ timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
+ mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
+ typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
+ valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
+ }
+
+ /******************** CellOutputStream methods *************************/
+
+ /**
+ * Note: Unused until support is added to the scanner/heap
+ *
+ * The following method are optimized versions of write(Cell cell). The result should be
+ * identical, however the implementation may be able to execute them much more efficiently because
+ * it does not need to compare the unchanged fields with the previous cell's.
+ *
+ * Consider the benefits during compaction when paired with a CellScanner that is also aware of
+ * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
+ * to the write(Cell cell) method.
+ *
+ * The savings of skipping duplicate row detection are significant with long row keys. A
+ * DataBlockEncoder may store a row key once in combination with a count of how many cells are in
+ * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
+ * of the counter, and that is for every cell in the row.
+ */
+
+ /**
+ * Add a Cell to the output stream but repeat the previous row.
+ */
+ //@Override
+ public void writeWithRepeatRow(Cell cell) {
+ ensurePerCellCapacities();//can we optimize away some of this?
+
+ //save a relatively expensive row comparison, incrementing the row's counter instead
+ rowTokenizer.incrementNumOccurrencesOfLatestValue();
+ addFamilyPart(cell);
+ addQualifierPart(cell);
+ addAfterRowFamilyQualifier(cell);
+ }
+
+
+ @Override
+ public void write(Cell cell) {
+ ensurePerCellCapacities();
+
+ rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange));
+ addFamilyPart(cell);
+ addQualifierPart(cell);
+ addAfterRowFamilyQualifier(cell);
+ }
+
+
+ /***************** internal add methods ************************/
+
+ private void addAfterRowFamilyQualifier(Cell cell){
+ // timestamps
+ timestamps[totalCells] = cell.getTimestamp();
+ timestampEncoder.add(cell.getTimestamp());
+
+ // memstore timestamps
+ if (includeMvccVersion) {
+ mvccVersions[totalCells] = cell.getMvccVersion();
+ mvccVersionEncoder.add(cell.getMvccVersion());
+ totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion());
+ }else{
+ //must overwrite in case there was a previous version in this array slot
+ mvccVersions[totalCells] = 0L;
+ if(totalCells == 0){//only need to do this for the first cell added
+ mvccVersionEncoder.add(0L);
+ }
+ //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
+ }
+
+ // types
+ typeBytes[totalCells] = cell.getTypeByte();
+ cellTypeEncoder.add(cell.getTypeByte());
+
+ // values
+ totalValueBytes += cell.getValueLength();
+ // double the array each time we run out of space
+ values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
+ CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]);
+ if (cell.getValueLength() > maxValueLength) {
+ maxValueLength = cell.getValueLength();
+ }
+ valueOffsets[totalCells + 1] = totalValueBytes;
+
+ // general
+ totalUnencodedBytes += KeyValueUtil.length(cell);
+ ++totalCells;
+ }
+
+ private void addFamilyPart(Cell cell) {
+ if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
+ CellUtil.fillFamilyRange(cell, familyRange);
+ familyDeduplicator.add(familyRange);
+ }
+ }
+
+ private void addQualifierPart(Cell cell) {
+ CellUtil.fillQualifierRange(cell, qualifierRange);
+ qualifierDeduplicator.add(qualifierRange);
+ }
+
+
+ /****************** compiling/flushing ********************/
+
+ /**
+ * Expensive method. The second half of the encoding work happens here.
+ *
+ * Take all the separate accumulated data structures and turn them into a single stream of bytes
+ * which is written to the outputStream.
+ */
+ @Override
+ public void flush() throws IOException {
+ compile();
+
+ // do the actual flushing to the output stream. Order matters.
+ blockMeta.writeVariableBytesToOutputStream(outputStream);
+ rowWriter.writeBytes(outputStream);
+ familyWriter.writeBytes(outputStream);
+ qualifierWriter.writeBytes(outputStream);
+ timestampEncoder.writeBytes(outputStream);
+ mvccVersionEncoder.writeBytes(outputStream);
+ //CellType bytes are in the row nodes. there is no additional type section
+ outputStream.write(values, 0, totalValueBytes);
+ }
+
+ /**
+ * Now that all the cells have been added, do the work to reduce them to a series of byte[]
+ * fragments that are ready to be written to the output stream.
+ */
+ protected void compile(){
+ blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
+ int lastValueOffset = valueOffsets[totalCells];
+ blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
+ blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
+ blockMeta.setNumValueBytes(totalValueBytes);
+ totalBytes += totalValueBytes;
+
+ //these compile methods will add to totalBytes
+ compileTypes();
+ compileMvccVersions();
+ compileTimestamps();
+ compileQualifiers();
+ compileFamilies();
+ compileRows();
+
+ int numMetaBytes = blockMeta.calculateNumMetaBytes();
+ blockMeta.setNumMetaBytes(numMetaBytes);
+ totalBytes += numMetaBytes;
+ }
+
+ /**
+ * The following "compile" methods do any intermediate work necessary to transform the cell
+ * fragments collected during the writing phase into structures that are ready to write to the
+ * outputStream.
+ *
+ * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
+ */
+
+ protected void compileTypes() {
+ blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
+ if(cellTypeEncoder.areAllSameType()){
+ blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
+ }
+ }
+
+ protected void compileMvccVersions() {
+ mvccVersionEncoder.compile();
+ blockMeta.setMvccVersionFields(mvccVersionEncoder);
+ int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
+ totalBytes += numMvccVersionBytes;
+ }
+
+ protected void compileTimestamps() {
+ timestampEncoder.compile();
+ blockMeta.setTimestampFields(timestampEncoder);
+ int numTimestampBytes = timestampEncoder.getOutputArrayLength();
+ totalBytes += numTimestampBytes;
+ }
+
+ protected void compileQualifiers() {
+ blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
+ qualifierDeduplicator.compile();
+ qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
+ qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false);
+ qualifierWriter.compile();
+ int numQualifierBytes = qualifierWriter.getNumBytes();
+ blockMeta.setNumQualifierBytes(numQualifierBytes);
+ totalBytes += numQualifierBytes;
+ }
+
+ protected void compileFamilies() {
+ blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
+ familyDeduplicator.compile();
+ familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
+ familyWriter.reconstruct(blockMeta, familyTokenizer, true);
+ familyWriter.compile();
+ int numFamilyBytes = familyWriter.getNumBytes();
+ blockMeta.setNumFamilyBytes(numFamilyBytes);
+ totalBytes += numFamilyBytes;
+ }
+
+ protected void compileRows() {
+ rowWriter.reconstruct(this);
+ rowWriter.compile();
+ int numRowBytes = rowWriter.getNumBytes();
+ blockMeta.setNumRowBytes(numRowBytes);
+ blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
+ totalBytes += numRowBytes;
+ }
+
+ /********************* convenience getters ********************************/
+
+ public long getValueOffset(int index) {
+ return valueOffsets[index];
+ }
+
+ public int getValueLength(int index) {
+ return (int) (valueOffsets[index + 1] - valueOffsets[index]);
+ }
+
+ /************************* get/set *************************************/
+
+ public PrefixTreeBlockMeta getBlockMeta() {
+ return blockMeta;
+ }
+
+ public Tokenizer getRowTokenizer() {
+ return rowTokenizer;
+ }
+
+ public LongEncoder getTimestampEncoder() {
+ return timestampEncoder;
+ }
+
+ public int getTotalBytes() {
+ return totalBytes;
+ }
+
+ public long[] getTimestamps() {
+ return timestamps;
+ }
+
+ public long[] getMvccVersions() {
+ return mvccVersions;
+ }
+
+ public byte[] getTypeBytes() {
+ return typeBytes;
+ }
+
+ public LongEncoder getMvccVersionEncoder() {
+ return mvccVersionEncoder;
+ }
+
+ public ByteRangeSet getFamilySorter() {
+ return familyDeduplicator;
+ }
+
+ public ByteRangeSet getQualifierSorter() {
+ return qualifierDeduplicator;
+ }
+
+ public ColumnSectionWriter getFamilyWriter() {
+ return familyWriter;
+ }
+
+ public ColumnSectionWriter getQualifierWriter() {
+ return qualifierWriter;
+ }
+
+ public RowSectionWriter getRowWriter() {
+ return rowWriter;
+ }
+
+ public ByteRange getValueByteRange() {
+ return new ByteRange(values, 0, totalValueBytes);
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java
new file mode 100644
index 0000000..6cbe0c2
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode;
+
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+
+/**
+ * Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and
+ * would be more garbage than the data in the block. A new encoder is needed for each block in
+ * a flush, compaction, RPC response, etc.
+ *
+ * It is not a pool in the traditional sense, but implements the semantics of a traditional pool
+ * via ThreadLocals to avoid sharing between threads. Sharing between threads would not be
+ * very expensive given that it's accessed per-block, but this is just as easy.
+ *
+ * This pool implementation assumes there is a one-to-one mapping between a single thread and a
+ * single flush or compaction.
+ */
+@InterfaceAudience.Private
+public class ThreadLocalEncoderPool implements EncoderPool{
+
+ private static final ThreadLocal ENCODER
+ = new ThreadLocal();
+
+ /**
+ * Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the
+ * current thread.
+ */
+ @Override
+ public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) {
+ PrefixTreeEncoder builder = ENCODER.get();
+ builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion);
+ ENCODER.set(builder);
+ return builder;
+ }
+
+ @Override
+ public void checkIn(PrefixTreeEncoder encoder) {
+ // attached to thread on checkOut, so shouldn't need to do anything here
+
+ // do we need to worry about detaching encoders from compaction threads or are the same threads
+ // used over and over
+ }
+
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
new file mode 100644
index 0000000..0105f8f
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Strings;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+import org.apache.hadoop.hbase.util.vint.UVIntTool;
+
+/**
+ * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
+ * The family and qualifier sections of the data block are made of 1 or more of these nodes.
+ *
+ * Each node is composed of 3 sections:
+ *
tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
+ *
token[]: the actual token bytes
+ *
parentStartPosition: the offset of the next node from the start of the family or qualifier
+ * section
+ */
+@InterfaceAudience.Private
+public class ColumnNodeWriter{
+
+ /************* fields ****************************/
+
+ protected TokenizerNode builderNode;
+ protected PrefixTreeBlockMeta blockMeta;
+
+ protected boolean familyVsQualifier;
+
+ protected int tokenLength;
+ protected byte[] token;
+ protected int parentStartPosition;
+
+
+ /*************** construct **************************/
+
+ public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
+ boolean familyVsQualifier) {
+ this.blockMeta = blockMeta;
+ this.builderNode = builderNode;
+ this.familyVsQualifier = familyVsQualifier;
+ calculateTokenLength();
+ }
+
+
+ /************* methods *******************************/
+
+ public boolean isRoot() {
+ return parentStartPosition == 0;
+ }
+
+ private void calculateTokenLength() {
+ tokenLength = builderNode.getTokenLength();
+ token = new byte[tokenLength];
+ }
+
+ /**
+ * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
+ * placeholder.
+ * @param offsetWidthPlaceholder the placeholder
+ * @return node width
+ */
+ public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
+ int width = 0;
+ width += UVIntTool.numBytes(tokenLength);
+ width += token.length;
+ width += offsetWidthPlaceholder;
+ return width;
+ }
+
+ public void writeBytes(OutputStream os) throws IOException {
+ int parentOffsetWidth;
+ if (familyVsQualifier) {
+ parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
+ } else {
+ parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
+ }
+ UVIntTool.writeBytes(tokenLength, os);
+ os.write(token);
+ UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
+ }
+
+ public void setTokenBytes(ByteRange source) {
+ source.deepCopySubRangeTo(0, tokenLength, token, 0);
+ }
+
+
+ /****************** standard methods ************************/
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
+ sb.append("[");
+ sb.append(Bytes.toString(token));
+ sb.append("]->");
+ sb.append(parentStartPosition);
+ return sb.toString();
+ }
+
+
+ /************************** get/set ***********************/
+
+ public void setParentStartPosition(int parentStartPosition) {
+ this.parentStartPosition = parentStartPosition;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
new file mode 100644
index 0000000..122ffb4
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.column;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
+ * section is written after the row section, and qualifier section after family section.
+ *
+ * The family and qualifier tries, or "column tries", are structured differently than the row trie.
+ * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
+ * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
+ * row, the row trie contains a list of offsets into the column sections (along with pointers to
+ * timestamps and other per-cell fields). These offsets point to the last column node/token that
+ * comprises the column name. To assemble the column name, the trie is traversed in reverse (right
+ * to left), with the rightmost tokens pointing to the start of their "parent" node which is the
+ * node to the left.
+ *
+ * This choice was made to reduce the size of the column trie by storing the minimum amount of
+ * offset data. As a result, to find a specific qualifier within a row, you must do a binary search
+ * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
+ * encode the columns in both a forward and reverse trie, which would convert binary searches into
+ * more efficient trie searches which would be beneficial for wide rows.
+ */
+@InterfaceAudience.Private
+public class ColumnSectionWriter {
+
+ public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
+
+ /****************** fields ****************************/
+
+ private PrefixTreeBlockMeta blockMeta;
+
+ private boolean familyVsQualifier;
+ private Tokenizer tokenizer;
+ private int numBytes = 0;
+ private ArrayList nonLeaves;
+ private ArrayList leaves;
+ private ArrayList allNodes;
+ private ArrayList columnNodeWriters;
+ private List outputArrayOffsets;
+
+
+ /*********************** construct *********************/
+
+ public ColumnSectionWriter() {
+ this.nonLeaves = Lists.newArrayList();
+ this.leaves = Lists.newArrayList();
+ this.outputArrayOffsets = Lists.newArrayList();
+ }
+
+ public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
+ boolean familyVsQualifier) {
+ this();// init collections
+ reconstruct(blockMeta, builder, familyVsQualifier);
+ }
+
+ public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
+ boolean familyVsQualifier) {
+ this.blockMeta = blockMeta;
+ this.tokenizer = builder;
+ this.familyVsQualifier = familyVsQualifier;
+ }
+
+ public void reset() {
+ numBytes = 0;
+ nonLeaves.clear();
+ leaves.clear();
+ outputArrayOffsets.clear();
+ }
+
+
+ /****************** methods *******************************/
+
+ public ColumnSectionWriter compile() {
+ if (familyVsQualifier) {
+ // do nothing. max family length fixed at Byte.MAX_VALUE
+ } else {
+ blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
+ }
+
+ tokenizer.setNodeFirstInsertionIndexes();
+
+ tokenizer.appendNodes(nonLeaves, true, false);
+
+ tokenizer.appendNodes(leaves, false, true);
+
+ allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
+ allNodes.addAll(nonLeaves);
+ allNodes.addAll(leaves);
+
+ columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
+ for (int i = 0; i < allNodes.size(); ++i) {
+ TokenizerNode node = allNodes.get(i);
+ columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
+ }
+
+ // leaf widths are known at this point, so add them up
+ int totalBytesWithoutOffsets = 0;
+ for (int i = allNodes.size() - 1; i >= 0; --i) {
+ ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
+ // leaves store all but their first token byte
+ totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
+ }
+
+ // figure out how wide our offset FInts are
+ int parentOffsetWidth = 0;
+ while (true) {
+ ++parentOffsetWidth;
+ int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
+ if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
+ numBytes = numBytesFinder;
+ break;
+ }// it fits
+ }
+ if (familyVsQualifier) {
+ blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
+ } else {
+ blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
+ }
+
+ int forwardIndex = 0;
+ for (int i = 0; i < allNodes.size(); ++i) {
+ TokenizerNode node = allNodes.get(i);
+ ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
+ int fullNodeWidth = columnNodeWriter
+ .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
+ node.setOutputArrayOffset(forwardIndex);
+ columnNodeWriter.setTokenBytes(node.getToken());
+ if (node.isRoot()) {
+ columnNodeWriter.setParentStartPosition(0);
+ } else {
+ columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
+ }
+ forwardIndex += fullNodeWidth;
+ }
+
+ tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
+
+ return this;
+ }
+
+ public void writeBytes(OutputStream os) throws IOException {
+ for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
+ columnNodeWriter.writeBytes(os);
+ }
+ }
+
+
+ /************* get/set **************************/
+
+ public ArrayList getColumnNodeWriters() {
+ return columnNodeWriters;
+ }
+
+ public int getNumBytes() {
+ return numBytes;
+ }
+
+ public int getOutputArrayOffset(int sortedIndex) {
+ return outputArrayOffsets.get(sortedIndex);
+ }
+
+ public ArrayList getNonLeaves() {
+ return nonLeaves;
+ }
+
+ public ArrayList getLeaves() {
+ return leaves;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
new file mode 100644
index 0000000..c8d6707
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
+ * KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
+ * therefore not repeating it for each cell and saving 1 byte per cell.
+ */
+@InterfaceAudience.Private
+public class CellTypeEncoder {
+
+ /************* fields *********************/
+
+ protected boolean pendingFirstType = true;
+ protected boolean allSameType = true;
+ protected byte onlyType;
+
+
+ /************* construct *********************/
+
+ public void reset() {
+ pendingFirstType = true;
+ allSameType = true;
+ }
+
+
+ /************* methods *************************/
+
+ public void add(byte type) {
+ if (pendingFirstType) {
+ onlyType = type;
+ pendingFirstType = false;
+ } else if (onlyType != type) {
+ allSameType = false;
+ }
+ }
+
+
+ /**************** get/set **************************/
+
+ public boolean areAllSameType() {
+ return allSameType;
+ }
+
+ public byte getOnlyType() {
+ return onlyType;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java
new file mode 100644
index 0000000..553d6cb
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ArrayUtils;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+
+import com.google.common.base.Joiner;
+
+/**
+ * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
+ * collection of Cells.
+ *
+ * 1. add longs to a HashSet for fast de-duplication
+ * 2. keep track of the min and max
+ * 3. copy all values to a new long[]
+ * 4. Collections.sort the long[]
+ * 5. calculate maxDelta = max - min
+ * 6. determine FInt width based on maxDelta
+ * 7. PrefixTreeEncoder binary searches to find index of each value
+ */
+@InterfaceAudience.Private
+public class LongEncoder {
+
+ /****************** fields ****************************/
+
+ protected HashSet uniqueValues;
+ protected long[] sortedUniqueValues;
+ protected long min, max, maxDelta;
+
+ protected int bytesPerDelta;
+ protected int bytesPerIndex;
+ protected int totalCompressedBytes;
+
+
+ /****************** construct ****************************/
+
+ public LongEncoder() {
+ this.uniqueValues = new HashSet();
+ }
+
+ public void reset() {
+ uniqueValues.clear();
+ sortedUniqueValues = null;
+ min = Long.MAX_VALUE;
+ max = Long.MIN_VALUE;
+ maxDelta = Long.MIN_VALUE;
+ bytesPerIndex = 0;
+ bytesPerDelta = 0;
+ totalCompressedBytes = 0;
+ }
+
+
+ /************* methods ***************************/
+
+ public void add(long timestamp) {
+ uniqueValues.add(timestamp);
+ }
+
+ public LongEncoder compile() {
+ int numUnique = uniqueValues.size();
+ if (numUnique == 1) {
+ min = CollectionUtils.getFirst(uniqueValues);
+ sortedUniqueValues = new long[] { min };
+ return this;
+ }
+
+ sortedUniqueValues = new long[numUnique];
+ int lastIndex = -1;
+ for (long value : uniqueValues) {
+ sortedUniqueValues[++lastIndex] = value;
+ }
+ Arrays.sort(sortedUniqueValues);
+ min = ArrayUtils.getFirst(sortedUniqueValues);
+ max = ArrayUtils.getLast(sortedUniqueValues);
+ maxDelta = max - min;
+ if (maxDelta > 0) {
+ bytesPerDelta = UFIntTool.numBytes(maxDelta);
+ } else {
+ bytesPerDelta = 0;
+ }
+
+ int maxIndex = numUnique - 1;
+ bytesPerIndex = UFIntTool.numBytes(maxIndex);
+
+ totalCompressedBytes = numUnique * bytesPerDelta;
+
+ return this;
+ }
+
+ public long getDelta(int index) {
+ if (sortedUniqueValues.length == 0) {
+ return 0;
+ }
+ return sortedUniqueValues[index] - min;
+ }
+
+ public int getIndex(long value) {
+ // should always find an exact match
+ return Arrays.binarySearch(sortedUniqueValues, value);
+ }
+
+ public void writeBytes(OutputStream os) throws IOException {
+ for (int i = 0; i < sortedUniqueValues.length; ++i) {
+ long delta = sortedUniqueValues[i] - min;
+ UFIntTool.writeBytes(bytesPerDelta, delta, os);
+ }
+ }
+
+ //convenience method for tests
+ public byte[] getByteArray() throws IOException{
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ writeBytes(baos);
+ return baos.toByteArray();
+ }
+
+ public int getOutputArrayLength() {
+ return sortedUniqueValues.length * bytesPerDelta;
+ }
+
+ public int getNumUniqueValues() {
+ return sortedUniqueValues.length;
+ }
+
+
+ /******************* Object methods **********************/
+
+ @Override
+ public String toString() {
+ if (ArrayUtils.isEmpty(sortedUniqueValues)) {
+ return "[]";
+ }
+ return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
+ }
+
+
+ /******************** get/set **************************/
+
+ public long getMin() {
+ return min;
+ }
+
+ public int getBytesPerDelta() {
+ return bytesPerDelta;
+ }
+
+ public int getBytesPerIndex() {
+ return bytesPerIndex;
+ }
+
+ public int getTotalCompressedBytes() {
+ return totalCompressedBytes;
+ }
+
+ public long[] getSortedUniqueTimestamps() {
+ return sortedUniqueValues;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
new file mode 100644
index 0000000..29ebafa
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
+import org.apache.hadoop.hbase.util.ByteRangeTool;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+import org.apache.hadoop.hbase.util.vint.UVIntTool;
+
+/**
+ * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
+ * Please see the write() method for the order in which data is written.
+ */
+@InterfaceAudience.Private
+public class RowNodeWriter{
+ protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
+
+ /********************* fields ******************************/
+
+ protected PrefixTreeEncoder prefixTreeEncoder;
+ protected PrefixTreeBlockMeta blockMeta;
+ protected TokenizerNode tokenizerNode;
+
+ protected int tokenWidth;
+ protected int fanOut;
+ protected int numCells;
+
+ protected int width;
+
+
+ /*********************** construct *************************/
+
+ public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
+ reconstruct(keyValueBuilder, tokenizerNode);
+ }
+
+ public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
+ this.prefixTreeEncoder = prefixTreeEncoder;
+ reset(tokenizerNode);
+ }
+
+ public void reset(TokenizerNode node) {
+ this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
+ this.tokenizerNode = node;
+ this.tokenWidth = 0;
+ this.fanOut = 0;
+ this.numCells = 0;
+ this.width = 0;
+ calculateOffsetsAndLengths();
+ }
+
+
+ /********************* methods ****************************/
+
+ protected void calculateOffsetsAndLengths(){
+ tokenWidth = tokenizerNode.getTokenLength();
+ if(!tokenizerNode.isRoot()){
+ --tokenWidth;//root has no parent
+ }
+ fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
+ numCells = tokenizerNode.getNumOccurrences();
+ }
+
+ public int calculateWidth(){
+ calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
+ return width;
+ }
+
+ public int calculateWidthOverrideOffsetWidth(int offsetWidth){
+ width = 0;
+ width += UVIntTool.numBytes(tokenWidth);
+ width += tokenWidth;
+
+ width += UVIntTool.numBytes(fanOut);
+ width += fanOut;
+
+ width += UVIntTool.numBytes(numCells);
+
+ if(tokenizerNode.hasOccurrences()){
+ int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
+ + blockMeta.getQualifierOffsetWidth()
+ + blockMeta.getTimestampIndexWidth()
+ + blockMeta.getMvccVersionIndexWidth()
+ + blockMeta.getKeyValueTypeWidth()
+ + blockMeta.getValueOffsetWidth()
+ + blockMeta.getValueLengthWidth();
+ width += numCells * fixedBytesPerCell;
+ }
+
+ if( ! tokenizerNode.isLeaf()){
+ width += fanOut * offsetWidth;
+ }
+
+ return width;
+ }
+
+
+ /*********************** writing the compiled structure to the OutputStream ***************/
+
+ public void write(OutputStream os) throws IOException{
+ //info about this row trie node
+ writeRowToken(os);
+ writeFan(os);
+ writeNumCells(os);
+
+ //UFInt indexes and offsets for each cell in the row (if nub or leaf)
+ writeFamilyNodeOffsets(os);
+ writeQualifierNodeOffsets(os);
+ writeTimestampIndexes(os);
+ writeMvccVersionIndexes(os);
+ writeCellTypes(os);
+ writeValueOffsets(os);
+ writeValueLengths(os);
+
+ //offsets to the children of this row trie node (if branch or nub)
+ writeNextRowTrieNodeOffsets(os);
+ }
+
+
+ /**
+ * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
+ * fields can reproduce all the row keys that compose the block.
+ */
+
+ /**
+ * UVInt: tokenWidth
+ * bytes: token
+ */
+ protected void writeRowToken(OutputStream os) throws IOException {
+ UVIntTool.writeBytes(tokenWidth, os);
+ int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
+ ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex);
+ }
+
+ /**
+ * UVInt: numFanBytes/fanOut
+ * bytes: each fan byte
+ */
+ public void writeFan(OutputStream os) throws IOException {
+ UVIntTool.writeBytes(fanOut, os);
+ if (fanOut <= 0) {
+ return;
+ }
+ ArrayList children = tokenizerNode.getChildren();
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ os.write(child.getToken().get(0));// first byte of each child's token
+ }
+ }
+
+ /**
+ * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
+ */
+ protected void writeNumCells(OutputStream os) throws IOException {
+ UVIntTool.writeBytes(numCells, os);
+ }
+
+
+ /**
+ * The following methods write data for each cell in the row, mostly consisting of indexes or
+ * offsets into the timestamp/column data structures that are written in the middle of the block.
+ * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
+ * search of a particular column/timestamp combination.
+ *
+ * Branch nodes will not have any data in these sections.
+ */
+
+ protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
+ if (blockMeta.getFamilyOffsetWidth() <= 0) {
+ return;
+ }
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
+ .getFirstInsertionIndex() + i : 0;
+ int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
+ cellInsertionIndex);
+ int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
+ sortedIndex);
+ UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
+ }
+ }
+
+ protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
+ if (blockMeta.getQualifierOffsetWidth() <= 0) {
+ return;
+ }
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
+ cellInsertionIndex);
+ int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
+ sortedIndex);
+ UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
+ }
+ }
+
+ protected void writeTimestampIndexes(OutputStream os) throws IOException {
+ if (blockMeta.getTimestampIndexWidth() <= 0) {
+ return;
+ }
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
+ int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
+ UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
+ }
+ }
+
+ protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
+ if (blockMeta.getMvccVersionIndexWidth() <= 0) {
+ return;
+ }
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
+ int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
+ UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
+ }
+ }
+
+ protected void writeCellTypes(OutputStream os) throws IOException {
+ if (blockMeta.isAllSameType()) {
+ return;
+ }
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
+ }
+ }
+
+ protected void writeValueOffsets(OutputStream os) throws IOException {
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
+ UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
+ }
+ }
+
+ protected void writeValueLengths(OutputStream os) throws IOException {
+ for (int i = 0; i < numCells; ++i) {
+ int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
+ int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
+ UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
+ }
+ }
+
+
+ /**
+ * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
+ */
+ protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
+ ArrayList children = tokenizerNode.getChildren();
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
+ UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
+ }
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
new file mode 100644
index 0000000..f5d4eba
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.row;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
+import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
+import org.apache.hadoop.hbase.util.vint.UFIntTool;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
+ * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
+ * also contains references to offsets in the other sections of the data block that enable the
+ * decoder to match a row key with its qualifier, timestamp, type, value, etc.
+ *
+ * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
+ * internals of each row node.
+ */
+@InterfaceAudience.Private
+public class RowSectionWriter {
+
+ /***************** fields **************************/
+
+ protected PrefixTreeEncoder prefixTreeEncoder;
+
+ protected PrefixTreeBlockMeta blockMeta;
+
+ protected int numBytes;
+
+ protected ArrayList nonLeaves;
+ protected ArrayList leaves;
+
+ protected ArrayList leafWriters;
+ protected ArrayList nonLeafWriters;
+
+ protected int numLeafWriters;
+ protected int numNonLeafWriters;
+
+
+ /********************* construct **********************/
+
+ public RowSectionWriter() {
+ this.nonLeaves = Lists.newArrayList();
+ this.leaves = Lists.newArrayList();
+ this.leafWriters = Lists.newArrayList();
+ this.nonLeafWriters = Lists.newArrayList();
+ }
+
+ public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
+ reconstruct(prefixTreeEncoder);
+ }
+
+ public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
+ this.prefixTreeEncoder = prefixTreeEncoder;
+ this.blockMeta = prefixTreeEncoder.getBlockMeta();
+ reset();
+ }
+
+ public void reset() {
+ numBytes = 0;
+ nonLeaves.clear();
+ leaves.clear();
+ numLeafWriters = 0;
+ numNonLeafWriters = 0;
+ }
+
+
+ /****************** methods *******************************/
+
+ public RowSectionWriter compile() {
+ blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
+ prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
+
+ prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
+ prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
+
+ // track the starting position of each node in final output
+ int negativeIndex = 0;
+
+ // create leaf writer nodes
+ // leaf widths are known at this point, so add them up
+ int totalLeafBytes = 0;
+ for (int i = leaves.size() - 1; i >= 0; --i) {
+ TokenizerNode leaf = leaves.get(i);
+ RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
+ ++numLeafWriters;
+ // leaves store all but their first token byte
+ int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
+ totalLeafBytes += leafNodeWidth;
+ negativeIndex += leafNodeWidth;
+ leaf.setNegativeIndex(negativeIndex);
+ }
+
+ int totalNonLeafBytesWithoutOffsets = 0;
+ int totalChildPointers = 0;
+ for (int i = nonLeaves.size() - 1; i >= 0; --i) {
+ TokenizerNode nonLeaf = nonLeaves.get(i);
+ RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
+ ++numNonLeafWriters;
+ totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
+ totalChildPointers += nonLeaf.getNumChildren();
+ }
+
+ // figure out how wide our offset FInts are
+ int offsetWidth = 0;
+ while (true) {
+ ++offsetWidth;
+ int offsetBytes = totalChildPointers * offsetWidth;
+ int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
+ if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
+ // it fits
+ numBytes = totalRowBytes;
+ break;
+ }
+ }
+ blockMeta.setNextNodeOffsetWidth(offsetWidth);
+
+ // populate negativeIndexes
+ for (int i = nonLeaves.size() - 1; i >= 0; --i) {
+ TokenizerNode nonLeaf = nonLeaves.get(i);
+ int writerIndex = nonLeaves.size() - i - 1;
+ RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
+ int nodeWidth = nonLeafWriter.calculateWidth();
+ negativeIndex += nodeWidth;
+ nonLeaf.setNegativeIndex(negativeIndex);
+ }
+
+ return this;
+ }
+
+ protected RowNodeWriter initializeWriter(List list, int index,
+ TokenizerNode builderNode) {
+ RowNodeWriter rowNodeWriter = null;
+ //check if there is an existing node we can recycle
+ if (index >= list.size()) {
+ //there are not enough existing nodes, so add a new one which will be retrieved below
+ list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
+ }
+ rowNodeWriter = list.get(index);
+ rowNodeWriter.reset(builderNode);
+ return rowNodeWriter;
+ }
+
+
+ public void writeBytes(OutputStream os) throws IOException {
+ for (int i = numNonLeafWriters - 1; i >= 0; --i) {
+ RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
+ nonLeafWriter.write(os);
+ }
+ // duplicates above... written more for clarity right now
+ for (int i = numLeafWriters - 1; i >= 0; --i) {
+ RowNodeWriter leafWriter = leafWriters.get(i);
+ leafWriter.write(os);
+ }
+ }
+
+
+ /***************** static ******************************/
+
+ protected static ArrayList filterByLeafAndReverse(
+ ArrayList ins, boolean leaves) {
+ ArrayList outs = Lists.newArrayList();
+ for (int i = ins.size() - 1; i >= 0; --i) {
+ TokenizerNode n = ins.get(i);
+ if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
+ outs.add(ins.get(i));
+ }
+ }
+ return outs;
+ }
+
+
+ /************* get/set **************************/
+
+ public int getNumBytes() {
+ return numBytes;
+ }
+
+ public ArrayList getNonLeaves() {
+ return nonLeaves;
+ }
+
+ public ArrayList getLeaves() {
+ return leaves;
+ }
+
+ public ArrayList getNonLeafWriters() {
+ return nonLeafWriters;
+ }
+
+ public ArrayList getLeafWriters() {
+ return leafWriters;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java
new file mode 100644
index 0000000..e10db3a
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
+
+import java.util.Comparator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Determines order of nodes in the output array. Maybe possible to optimize further.
+ */
+@InterfaceAudience.Private
+public class TokenDepthComparator implements Comparator {
+
+ @Override
+ public int compare(TokenizerNode a, TokenizerNode b) {
+ if(a==null){
+ throw new IllegalArgumentException("a cannot be null");
+ }
+ if(b==null){
+ throw new IllegalArgumentException("b cannot be null");
+ }
+
+ // put leaves at the end
+ if (!a.isLeaf() && b.isLeaf()) {
+ return -1;
+ }
+ if (a.isLeaf() && !b.isLeaf()) {
+ return 1;
+ }
+
+ if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability)
+ return a.getId() < b.getId() ? -1 : 1;
+ }
+
+ // compare depth
+ if (a.getTokenOffset() < b.getTokenOffset()) {
+ return -1;
+ }
+ if (a.getTokenOffset() > b.getTokenOffset()) {
+ return 1;
+ }
+
+ // if same depth, return lower id first. ids are unique
+ return a.getId() < b.getId() ? -1 : 1;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
new file mode 100644
index 0000000..a21bd12
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ArrayUtils;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Data structure used in the first stage of PrefixTree encoding:
+ *
accepts a sorted stream of ByteRanges
+ *
splits them into a set of tokens, each held by a {@link TokenizerNode}
+ *
connects the TokenizerNodes via standard java references
+ *
keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
+ *
+ * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
+ * encoding.
+ */
+@InterfaceAudience.Private
+public class Tokenizer{
+
+ /***************** fields **************************/
+
+ protected int numArraysAdded = 0;
+ protected long lastNodeId = -1;
+ protected ArrayList nodes;
+ protected int numNodes;
+ protected TokenizerNode root;
+ protected byte[] tokens;
+ protected int tokensLength;
+
+ protected int maxElementLength = 0;
+ // number of levels in the tree assuming root level is 0
+ protected int treeDepth = 0;
+
+
+ /******************* construct *******************/
+
+ public Tokenizer() {
+ this.nodes = Lists.newArrayList();
+ this.tokens = new byte[0];
+ }
+
+ public void reset() {
+ numArraysAdded = 0;
+ lastNodeId = -1;
+ numNodes = 0;
+ tokensLength = 0;
+ root = null;
+ maxElementLength = 0;
+ treeDepth = 0;
+ }
+
+
+ /***************** building *************************/
+
+ public void addAll(ArrayList sortedByteRanges) {
+ for (int i = 0; i < sortedByteRanges.size(); ++i) {
+ ByteRange byteRange = sortedByteRanges.get(i);
+ addSorted(byteRange);
+ }
+ }
+
+ public void addSorted(final ByteRange bytes) {
+ ++numArraysAdded;
+ if (bytes.getLength() > maxElementLength) {
+ maxElementLength = bytes.getLength();
+ }
+ if (root == null) {
+ // nodeDepth of firstNode (non-root) is 1
+ root = addNode(null, 1, 0, bytes, 0);
+ } else {
+ root.addSorted(bytes);
+ }
+ }
+
+ public void incrementNumOccurrencesOfLatestValue(){
+ CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
+ }
+
+ protected long nextNodeId() {
+ return ++lastNodeId;
+ }
+
+ protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
+ final ByteRange token, int inputTokenOffset) {
+ int inputTokenLength = token.getLength() - inputTokenOffset;
+ int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
+ TokenizerNode node = null;
+ if (nodes.size() <= numNodes) {
+ node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
+ inputTokenLength);
+ nodes.add(node);
+ } else {
+ node = nodes.get(numNodes);
+ node.reset();
+ node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
+ }
+ ++numNodes;
+ return node;
+ }
+
+ protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
+ int newOffset = tokensLength;
+ int inputTokenLength = token.getLength() - inputTokenOffset;
+ int newMinimum = tokensLength + inputTokenLength;
+ tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
+ token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
+ tokensLength += inputTokenLength;
+ return newOffset;
+ }
+
+ protected void submitMaxNodeDepthCandidate(int nodeDepth) {
+ if (nodeDepth > treeDepth) {
+ treeDepth = nodeDepth;
+ }
+ }
+
+
+ /********************* read ********************/
+
+ public int getNumAdded(){
+ return numArraysAdded;
+ }
+
+ // for debugging
+ public ArrayList getNodes(boolean includeNonLeaves, boolean includeLeaves) {
+ ArrayList nodes = Lists.newArrayList();
+ root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
+ return nodes;
+ }
+
+ public void appendNodes(List appendTo, boolean includeNonLeaves,
+ boolean includeLeaves) {
+ root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
+ }
+
+ public List getArrays() {
+ List nodes = new ArrayList();
+ root.appendNodesToExternalList(nodes, true, true);
+ List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
+ for (int i = 0; i < nodes.size(); ++i) {
+ TokenizerNode node = nodes.get(i);
+ for (int j = 0; j < node.getNumOccurrences(); ++j) {
+ byte[] byteArray = node.getNewByteArray();
+ byteArrays.add(byteArray);
+ }
+ }
+ return byteArrays;
+ }
+
+ //currently unused, but working and possibly useful in the future
+ public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
+ int keyLength) {
+ root.getNode(resultHolder, key, keyOffset, keyLength);
+ }
+
+
+ /********************** write ***************************/
+
+ public Tokenizer setNodeFirstInsertionIndexes() {
+ root.setInsertionIndexes(0);
+ return this;
+ }
+
+ public Tokenizer appendOutputArrayOffsets(List offsets) {
+ root.appendOutputArrayOffsets(offsets);
+ return this;
+ }
+
+
+ /********************* print/debug ********************/
+
+ protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(getStructuralString());
+ if (INCLUDE_FULL_TREE_IN_TO_STRING) {
+ for (byte[] bytes : getArrays()) {
+ if (sb.length() > 0) {
+ sb.append("\n");
+ }
+ sb.append(Bytes.toString(bytes));
+ }
+ }
+ return sb.toString();
+ }
+
+ public String getStructuralString() {
+ List nodes = getNodes(true, true);
+ StringBuilder sb = new StringBuilder();
+ for (TokenizerNode node : nodes) {
+ String line = node.getPaddedTokenAndOccurrenceString();
+ sb.append(line + "\n");
+ }
+ return sb.toString();
+ }
+
+
+ /****************** get/set ************************/
+
+ public TokenizerNode getRoot() {
+ return root;
+ }
+
+ public int getMaxElementLength() {
+ return maxElementLength;
+ }
+
+ public int getTreeDepth() {
+ return treeDepth;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java
new file mode 100644
index 0000000..077b5f5
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java
@@ -0,0 +1,632 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.Strings;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Individual node in a Trie structure. Each node is one of 3 types:
+ *
Branch: an internal trie node that may have a token and must have multiple children, but does
+ * not represent an actual input byte[], hence its numOccurrences is 0
+ *
Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the
+ * last bytes in the input byte[]s.
+ *
Nub: a combination of a branch and leaf. Its token represents the last bytes of input
+ * byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s
+ * that add bytes to this nodes input byte[].
+ *
+ * numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
+ */
+@InterfaceAudience.Private
+public class TokenizerNode{
+
+ /*
+ * Ref to data structure wrapper
+ */
+ protected Tokenizer builder;
+
+ /******************************************************************
+ * Tree content/structure used during tokenization
+ * ****************************************************************/
+
+ /*
+ * ref to parent trie node
+ */
+ protected TokenizerNode parent;
+
+ /*
+ * node depth in trie, irrespective of each node's token length
+ */
+ protected int nodeDepth;
+
+ /*
+ * start index of this token in original byte[]
+ */
+ protected int tokenStartOffset;
+
+ /*
+ * bytes for this trie node. can be length 0 in root node
+ */
+ protected ByteRange token;
+
+ /*
+ * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
+ * nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
+ * that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
+ */
+ protected int numOccurrences;
+
+ /*
+ * The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
+ * child nodes.
+ */
+ protected ArrayList children;
+
+
+ /*
+ * Fields used later in the encoding process for sorting the nodes into the order they'll be
+ * written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
+ * are not generic data structures but instead are specific to HBase PrefixTree encoding.
+ */
+
+ /*
+ * unique id assigned to each TokenizerNode
+ */
+ protected long id;
+
+ /*
+ * set >=0 for nubs and leaves
+ */
+ protected int firstInsertionIndex = -1;
+
+ /*
+ * A positive value indicating how many bytes before the end of the block this node will start. If
+ * the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
+ */
+ protected int negativeIndex = 0;
+
+ /*
+ * The offset in the output array at which to start writing this node's token bytes. Influenced
+ * by the lengths of all tokens sorted before this one.
+ */
+ protected int outputArrayOffset = -1;
+
+
+ /*********************** construct *****************************/
+
+ public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
+ int tokenStartOffset, int tokenOffset, int tokenLength) {
+ this.token = new ByteRange();
+ reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
+ this.children = Lists.newArrayList();
+ }
+
+ /*
+ * Sub-constructor for initializing all fields without allocating a new object. Used by the
+ * regular constructor.
+ */
+ public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
+ int tokenStartOffset, int tokenOffset, int tokenLength) {
+ this.builder = builder;
+ this.id = builder.nextNodeId();
+ this.parent = parent;
+ this.nodeDepth = nodeDepth;
+ builder.submitMaxNodeDepthCandidate(nodeDepth);
+ this.tokenStartOffset = tokenStartOffset;
+ this.token.set(builder.tokens, tokenOffset, tokenLength);
+ this.numOccurrences = 1;
+ }
+
+ /*
+ * Clear the state of this node so that it looks like it was just allocated.
+ */
+ public void reset() {
+ builder = null;
+ parent = null;
+ nodeDepth = 0;
+ tokenStartOffset = 0;
+ token.clear();
+ numOccurrences = 0;
+ children.clear();// branches & nubs
+
+ // ids/offsets. used during writing to byte[]
+ id = 0;
+ firstInsertionIndex = -1;// set >=0 for nubs and leaves
+ negativeIndex = 0;
+ outputArrayOffset = -1;
+ }
+
+
+ /************************* building *********************************/
+
+ /*
+ *
Only public method used during the tokenization process
+ *
Requires that the input ByteRange sort after the previous, and therefore after all previous
+ * inputs
+ *
Only looks at bytes of the input array that align with this node's token
+ */
+ public void addSorted(final ByteRange bytes) {// recursively build the tree
+
+ /*
+ * Recurse deeper into the existing trie structure
+ */
+ if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
+ TokenizerNode lastChild = CollectionUtils.getLast(children);
+ if (lastChild.partiallyMatchesToken(bytes)) {
+ lastChild.addSorted(bytes);
+ return;
+ }
+ }
+
+ /*
+ * Recursion ended. We must either
+ *
1: increment numOccurrences if this input was equal to the previous
+ *
2: convert this node from a leaf to a nub, and add a new child leaf
+ *
3: split this node into a branch and leaf, and then add a second leaf
+ */
+
+ // add it as a child of this node
+ int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
+ int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
+ int tailLength = bytes.getLength() - tailOffset;
+
+ if (numIdenticalTokenBytes == token.getLength()) {
+ if (tailLength == 0) {// identical to this node (case 1)
+ incrementNumOccurrences(1);
+ } else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
+ int childNodeDepth = nodeDepth + 1;
+ int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
+ TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
+ bytes, tailOffset);
+ addChild(newChildNode);
+ }
+ } else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
+ split(numIdenticalTokenBytes, bytes);
+ }
+ }
+
+
+ protected void addChild(TokenizerNode node) {
+ node.setParent(this);
+ children.add(node);
+ }
+
+
+ /**
+ * Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
+ * method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
+ * will be 3 nodes:
+ *
1: B <- branch
+ *
2: AA <- leaf
+ *
3: OO <- leaf
+ *
+ * @param numTokenBytesToRetain => 1 (the B)
+ * @param bytes => BOO
+ */
+ protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
+ int childNodeDepth = nodeDepth;
+ int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
+
+ //create leaf AA
+ TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
+ token, numTokenBytesToRetain);
+ firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
+ token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
+ numOccurrences = 0;//current node is now a branch
+
+ moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
+ addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
+
+ //create leaf OO
+ TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
+ bytes, tokenStartOffset + numTokenBytesToRetain);
+ addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
+
+ // we inserted branch node B as a new level above/before the two children, so increment the
+ // depths of the children below
+ firstChild.incrementNodeDepthRecursively();
+ secondChild.incrementNodeDepthRecursively();
+ }
+
+
+ protected void incrementNodeDepthRecursively() {
+ ++nodeDepth;
+ builder.submitMaxNodeDepthCandidate(nodeDepth);
+ for (int i = 0; i < children.size(); ++i) {
+ children.get(i).incrementNodeDepthRecursively();
+ }
+ }
+
+
+ protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ child.setParent(newParent);
+ newParent.children.add(child);
+ }
+ children.clear();
+ }
+
+
+ /************************ byte[] utils *************************/
+
+ protected boolean partiallyMatchesToken(ByteRange bytes) {
+ return numIdenticalBytes(bytes) > 0;
+ }
+
+ protected boolean matchesToken(ByteRange bytes) {
+ return numIdenticalBytes(bytes) == getTokenLength();
+ }
+
+ protected int numIdenticalBytes(ByteRange bytes) {
+ return token.numEqualPrefixBytes(bytes, tokenStartOffset);
+ }
+
+
+ /***************** moving nodes around ************************/
+
+ public void appendNodesToExternalList(List appendTo, boolean includeNonLeaves,
+ boolean includeLeaves) {
+ if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
+ appendTo.add(this);
+ }
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
+ }
+ }
+
+ public int setInsertionIndexes(int nextIndex) {
+ int newNextIndex = nextIndex;
+ if (hasOccurrences()) {
+ setFirstInsertionIndex(nextIndex);
+ newNextIndex += numOccurrences;
+ }
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ newNextIndex = child.setInsertionIndexes(newNextIndex);
+ }
+ return newNextIndex;
+ }
+
+ public void appendOutputArrayOffsets(List offsets) {
+ if (hasOccurrences()) {
+ offsets.add(outputArrayOffset);
+ }
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ child.appendOutputArrayOffsets(offsets);
+ }
+ }
+
+
+ /***************** searching *********************************/
+
+ /*
+ * Do a trie style search through the tokenizer. One option for looking up families or qualifiers
+ * during encoding, but currently unused in favor of tracking this information as they are added.
+ *
+ * Keeping code pending further performance testing.
+ */
+ public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
+ int keyLength) {
+ int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
+
+ // quick check if the key is shorter than this node (may not work for binary search)
+ if (CollectionUtils.isEmpty(children)) {
+ if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
+ resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
+ return;
+ }
+ }
+
+ // all token bytes must match
+ for (int i = 0; i < token.getLength(); ++i) {
+ if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
+ // TODO return whether it's before or after so we can binary search
+ resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
+ return;
+ }
+ }
+
+ if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
+ resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
+ return;
+ }
+
+ if (CollectionUtils.notEmpty(children)) {
+ // TODO binary search the children
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ child.getNode(resultHolder, key, keyOffset, keyLength);
+ if (resultHolder.isMatch()) {
+ return;
+ } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
+ // passed it, so it doesn't exist
+ resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
+ return;
+ }
+ // key is still AFTER the current node, so continue searching
+ }
+ }
+
+ // checked all children (or there were no children), and didn't find it
+ resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
+ return;
+ }
+
+
+ /****************** writing back to byte[]'s *************************/
+
+ public byte[] getNewByteArray() {
+ byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
+ fillInBytes(arrayToFill);
+ return arrayToFill;
+ }
+
+ public void fillInBytes(byte[] arrayToFill) {
+ for (int i = 0; i < token.getLength(); ++i) {
+ arrayToFill[tokenStartOffset + i] = token.get(i);
+ }
+ if (parent != null) {
+ parent.fillInBytes(arrayToFill);
+ }
+ }
+
+
+ /************************** printing ***********************/
+
+ @Override
+ public String toString() {
+ String s = "";
+ if (parent == null) {
+ s += "R ";
+ } else {
+ s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
+ }
+ s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
+ if (numOccurrences > 0) {
+ s += "x" + numOccurrences;
+ }
+ return s;
+ }
+
+ public String getPaddedTokenAndOccurrenceString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(getBnlIndicator(true));
+ sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
+ sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
+ if (outputArrayOffset >= 0) {
+ sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
+ }
+ sb.append(" ");
+ for (int i = 0; i < tokenStartOffset; ++i) {
+ sb.append(" ");
+ }
+ sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
+ return sb.toString();
+ }
+
+ public String getBnlIndicator(boolean indent) {
+ if (indent) {
+ if (isNub()) {
+ return " N ";
+ }
+ return isBranch() ? "B " : " L";
+ }
+ if (isNub()) {
+ return "N";
+ }
+ return isBranch() ? "B" : "L";
+ }
+
+
+ /********************** count different node types ********************/
+
+ public int getNumBranchNodesIncludingThisNode() {
+ if (isLeaf()) {
+ return 0;
+ }
+ int totalFromThisPlusChildren = isBranch() ? 1 : 0;
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
+ }
+ return totalFromThisPlusChildren;
+ }
+
+ public int getNumNubNodesIncludingThisNode() {
+ if (isLeaf()) {
+ return 0;
+ }
+ int totalFromThisPlusChildren = isNub() ? 1 : 0;
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
+ }
+ return totalFromThisPlusChildren;
+ }
+
+ public int getNumLeafNodesIncludingThisNode() {
+ if (isLeaf()) {
+ return 1;
+ }
+ int totalFromChildren = 0;
+ for (int i = 0; i < children.size(); ++i) {
+ TokenizerNode child = children.get(i);
+ totalFromChildren += child.getNumLeafNodesIncludingThisNode();
+ }
+ return totalFromChildren;
+ }
+
+
+ /*********************** simple read-only methods *******************************/
+
+ public int getNodeDepth() {
+ return nodeDepth;
+ }
+
+ public int getTokenLength() {
+ return token.getLength();
+ }
+
+ public boolean hasOccurrences() {
+ return numOccurrences > 0;
+ }
+
+ public boolean isRoot() {
+ return this.parent == null;
+ }
+
+ public int getNumChildren() {
+ return CollectionUtils.nullSafeSize(children);
+ }
+
+ public TokenizerNode getLastChild() {
+ if (CollectionUtils.isEmpty(children)) {
+ return null;
+ }
+ return CollectionUtils.getLast(children);
+ }
+
+ public boolean isLeaf() {
+ return CollectionUtils.isEmpty(children) && hasOccurrences();
+ }
+
+ public boolean isBranch() {
+ return CollectionUtils.notEmpty(children) && !hasOccurrences();
+ }
+
+ public boolean isNub() {
+ return CollectionUtils.notEmpty(children) && hasOccurrences();
+ }
+
+
+ /********************** simple mutation methods *************************/
+
+ /**
+ * Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by
+ * an external class without going through the process of detecting a repeat if it is a known
+ * repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
+ * the new cells are part of the current row.
+ * @param d increment by this amount
+ */
+ public void incrementNumOccurrences(int d) {
+ numOccurrences += d;
+ }
+
+
+ /************************* autogenerated get/set ******************/
+
+ public int getTokenOffset() {
+ return tokenStartOffset;
+ }
+
+ public TokenizerNode getParent() {
+ return parent;
+ }
+
+ public ByteRange getToken() {
+ return token;
+ }
+
+ public int getNumOccurrences() {
+ return numOccurrences;
+ }
+
+ public void setParent(TokenizerNode parent) {
+ this.parent = parent;
+ }
+
+ public void setNumOccurrences(int numOccurrences) {
+ this.numOccurrences = numOccurrences;
+ }
+
+ public ArrayList getChildren() {
+ return children;
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ public int getFirstInsertionIndex() {
+ return firstInsertionIndex;
+ }
+
+ public void setFirstInsertionIndex(int firstInsertionIndex) {
+ this.firstInsertionIndex = firstInsertionIndex;
+ }
+
+ public int getNegativeIndex() {
+ return negativeIndex;
+ }
+
+ public void setNegativeIndex(int negativeIndex) {
+ this.negativeIndex = negativeIndex;
+ }
+
+ public int getOutputArrayOffset() {
+ return outputArrayOffset;
+ }
+
+ public void setOutputArrayOffset(int outputArrayOffset) {
+ this.outputArrayOffset = outputArrayOffset;
+ }
+
+ public void setId(long id) {
+ this.id = id;
+ }
+
+ public void setBuilder(Tokenizer builder) {
+ this.builder = builder;
+ }
+
+ public void setTokenOffset(int tokenOffset) {
+ this.tokenStartOffset = tokenOffset;
+ }
+
+ public void setToken(ByteRange token) {
+ this.token = token;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java
new file mode 100644
index 0000000..37909d9
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+
+/**
+ * Warning: currently unused, but code is valid. Pending performance testing on more data sets.
+ *
+ * Where is the key relative to our current position in the tree. For example, the current tree node
+ * is "BEFORE" the key we are seeking
+ */
+@InterfaceAudience.Private
+public enum TokenizerRowSearchPosition {
+
+ AFTER,//the key is after this tree node, so keep searching
+ BEFORE,//in a binary search, this tells us to back up
+ MATCH,//the current node is a full match
+ NO_MATCH,//might as well return a value more informative than null
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java
new file mode 100644
index 0000000..2d22c50
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+
+/**
+ * for recursively searching a PtBuilder
+ */
+@InterfaceAudience.Private
+public class TokenizerRowSearchResult{
+
+ /************ fields ************************/
+
+ protected TokenizerRowSearchPosition difference;
+ protected TokenizerNode matchingNode;
+
+
+ /*************** construct *****************/
+
+ public TokenizerRowSearchResult() {
+ }
+
+ public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
+ this.difference = difference;
+ }
+
+ public TokenizerRowSearchResult(TokenizerNode matchingNode) {
+ this.difference = TokenizerRowSearchPosition.MATCH;
+ this.matchingNode = matchingNode;
+ }
+
+
+ /*************** methods **********************/
+
+ public boolean isMatch() {
+ return TokenizerRowSearchPosition.MATCH == difference;
+ }
+
+
+ /************* get/set ***************************/
+
+ public TokenizerRowSearchPosition getDifference() {
+ return difference;
+ }
+
+ public TokenizerNode getMatchingNode() {
+ return matchingNode;
+ }
+
+ public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
+ this.difference = difference;
+ this.matchingNode = matchingNode;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java
new file mode 100644
index 0000000..f820258
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.scanner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * An indicator of the state of the scanner after an operation such as nextCell() or
+ * positionAt(..). For example:
+ *
+ *
In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
+ * it should load the next block.
+ *
In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
+ *
+ *
In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
+ * next region.
+ *
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public enum CellScannerPosition {
+
+ /**
+ * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
+ * cell.
+ */
+ BEFORE_FIRST,
+
+ /**
+ * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
+ * rather it is the nearest cell before the requested cell.
+ */
+ BEFORE,
+
+ /**
+ * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
+ * positionAt(..).
+ */
+ AT,
+
+ /**
+ * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
+ * rather it is the nearest cell after the requested cell.
+ */
+ AFTER,
+
+ /**
+ * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
+ */
+ AFTER_LAST
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java
new file mode 100644
index 0000000..b0f1e27
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.scanner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.Cell;
+
+/**
+ * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
+ * the implementation is able to navigate between cells without iterating through every cell.
+ */
+@InterfaceAudience.Private
+public interface CellSearcher extends ReversibleCellScanner {
+ /**
+ * Reset any state in the scanner so it appears it was freshly opened.
+ */
+ void resetToBeforeFirstEntry();
+
+ /**
+ * Do everything within this scanner's power to find the key. Look forward and backwards.
+ *
+ * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
+ *
+ * @param key position the CellScanner exactly on this key
+ * @return true if the cell existed and getCurrentCell() holds a valid cell
+ */
+ boolean positionAt(Cell key);
+
+ /**
+ * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
+ * exact match.
+ *
+ * @param key position the CellScanner on this key or the closest cell before
+ * @return AT if exact match
+ * BEFORE if on last cell before key
+ * BEFORE_FIRST if key was before the first cell in this scanner's scope
+ */
+ CellScannerPosition positionAtOrBefore(Cell key);
+
+ /**
+ * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
+ * match.
+ *
+ * @param key position the CellScanner on this key or the closest cell after
+ * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope
+ */
+ CellScannerPosition positionAtOrAfter(Cell key);
+
+ /**
+ * Note: Added for backwards compatibility with
+ * {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek}
+ *
+ * Look for the key, but only look after the current position. Probably not needed for an
+ * efficient tree implementation, but is important for implementations without random access such
+ * as unencoded KeyValue blocks.
+ *
+ * @param key position the CellScanner exactly on this key
+ * @return true if getCurrent() holds a valid cell
+ */
+ boolean seekForwardTo(Cell key);
+
+ /**
+ * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
+ * exact match.
+ *
+ * @param key
+ * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope
+ */
+ CellScannerPosition seekForwardToOrBefore(Cell key);
+
+ /**
+ * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
+ * exact match.
+ *
+ * @param key
+ * @return AT if exact match
+ * AFTER if on first cell after key
+ * AFTER_LAST if key was after the last cell in this scanner's scope
+ */
+ CellScannerPosition seekForwardToOrAfter(Cell key);
+
+ /**
+ * Note: This may not be appropriate to have in the interface. Need to investigate.
+ *
+ * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
+ * This is used by tests and for handling certain edge cases.
+ */
+ void positionAfterLastCell();
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java
new file mode 100644
index 0000000..bb83ac5
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.prefixtree.scanner;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.CellScanner;
+
+/**
+ * An extension of CellScanner indicating the scanner supports iterating backwards through cells.
+ *
+ * Note: This was not added to suggest that HBase should support client facing reverse Scanners,
+ * but
+ * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing
+ * up if the positionAt(..) method goes past the requested cell.
+ */
+@InterfaceAudience.Private
+public interface ReversibleCellScanner extends CellScanner {
+
+ /**
+ * Try to position the scanner one Cell before the current position.
+ * @return true if the operation was successful, meaning getCurrentCell() will return a valid
+ * Cell.
+ * false if there were no previous cells, meaning getCurrentCell() will return null.
+ * Scanner position will be
+ * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
+ */
+ boolean previous();
+
+ /**
+ * Try to position the scanner in the row before the current row.
+ * @param endOfRow true for the last cell in the previous row; false for the first cell
+ * @return true if the operation was successful, meaning getCurrentCell() will return a valid
+ * Cell.
+ * false if there were no previous cells, meaning getCurrentCell() will return null.
+ * Scanner position will be
+ * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
+ */
+ boolean previousRow(boolean endOfRow);
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java
new file mode 100644
index 0000000..570d489
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.byterange;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ArrayUtils;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
+ * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
+ *
+ * Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
+ * {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
+ * trie-oriented ByteRangeTrieSet, etc
+ */
+@InterfaceAudience.Private
+public abstract class ByteRangeSet {
+
+ /******************** fields **********************/
+
+ protected byte[] byteAppender;
+ protected int numBytes;
+
+ protected Map uniqueIndexByUniqueRange;
+
+ protected ArrayList uniqueRanges;
+ protected int numUniqueRanges = 0;
+
+ protected int[] uniqueRangeIndexByInsertionId;
+ protected int numInputs;
+
+ protected List sortedIndexByUniqueIndex;
+ protected int[] sortedIndexByInsertionId;
+ protected ArrayList sortedRanges;
+
+
+ /****************** construct **********************/
+
+ protected ByteRangeSet() {
+ this.byteAppender = new byte[0];
+ this.uniqueRanges = Lists.newArrayList();
+ this.uniqueRangeIndexByInsertionId = new int[0];
+ this.sortedIndexByUniqueIndex = Lists.newArrayList();
+ this.sortedIndexByInsertionId = new int[0];
+ this.sortedRanges = Lists.newArrayList();
+ }
+
+ public void reset() {
+ numBytes = 0;
+ uniqueIndexByUniqueRange.clear();
+ numUniqueRanges = 0;
+ numInputs = 0;
+ sortedIndexByUniqueIndex.clear();
+ sortedRanges.clear();
+ }
+
+
+ /*************** abstract *************************/
+
+ public abstract void addToSortedRanges();
+
+
+ /**************** methods *************************/
+
+ /**
+ * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
+ * insert it into the tracking Map uniqueIndexByUniqueRange.
+ */
+ public void add(ByteRange bytes) {
+ Integer index = uniqueIndexByUniqueRange.get(bytes);
+ if (index == null) {
+ index = store(bytes);
+ }
+ int minLength = numInputs + 1;
+ uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
+ minLength, 2 * minLength);
+ uniqueRangeIndexByInsertionId[numInputs] = index;
+ ++numInputs;
+ }
+
+ protected int store(ByteRange bytes) {
+ int indexOfNewElement = numUniqueRanges;
+ if (uniqueRanges.size() <= numUniqueRanges) {
+ uniqueRanges.add(new ByteRange());
+ }
+ ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
+ int neededBytes = numBytes + bytes.getLength();
+ byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
+ bytes.deepCopyTo(byteAppender, numBytes);
+ storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
+ numBytes += bytes.getLength();
+ uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
+ int newestUniqueIndex = numUniqueRanges;
+ ++numUniqueRanges;
+ return newestUniqueIndex;
+ }
+
+ public ByteRangeSet compile() {
+ addToSortedRanges();
+ for (int i = 0; i < sortedRanges.size(); ++i) {
+ sortedIndexByUniqueIndex.add(null);// need to grow the size
+ }
+ // TODO move this to an invert(int[]) util method
+ for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
+ int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
+ sortedIndexByUniqueIndex.set(uniqueIndex, i);
+ }
+ sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
+ numInputs);
+ for (int i = 0; i < numInputs; ++i) {
+ int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
+ int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
+ sortedIndexByInsertionId[i] = sortedIndex;
+ }
+ return this;
+ }
+
+ public int getSortedIndexForInsertionId(int insertionId) {
+ return sortedIndexByInsertionId[insertionId];
+ }
+
+ public int size() {
+ return uniqueIndexByUniqueRange.size();
+ }
+
+
+ /***************** standard methods ************************/
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ int i = 0;
+ for (ByteRange r : sortedRanges) {
+ if (i > 0) {
+ sb.append("\n");
+ }
+ sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
+ ++i;
+ }
+ sb.append("\ntotalSize:" + numBytes);
+ sb.append("\navgSize:" + getAvgSize());
+ return sb.toString();
+ }
+
+
+ /**************** get/set *****************************/
+
+ public ArrayList getSortedRanges() {
+ return sortedRanges;
+ }
+
+ public long getAvgSize() {
+ return numBytes / numUniqueRanges;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java
new file mode 100644
index 0000000..ef14a68
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.byterange.impl;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.IterableUtils;
+import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
+
+/**
+ * This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
+ * garbage when adding a new element to it. We can probably create a tighter implementation without
+ * pointers or garbage.
+ */
+@InterfaceAudience.Private
+public class ByteRangeHashSet extends ByteRangeSet {
+
+ /************************ constructors *****************************/
+
+ public ByteRangeHashSet() {
+ this.uniqueIndexByUniqueRange = new HashMap();
+ }
+
+ public ByteRangeHashSet(List rawByteArrays) {
+ for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
+ add(in);
+ }
+ }
+
+ @Override
+ public void addToSortedRanges() {
+ sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
+ Collections.sort(sortedRanges);
+ }
+
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java
new file mode 100644
index 0000000..eb86bc9
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.byterange.impl;
+
+import java.util.List;
+import java.util.TreeMap;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.ByteRange;
+import org.apache.hadoop.hbase.util.CollectionUtils;
+import org.apache.hadoop.hbase.util.IterableUtils;
+import org.apache.hadoop.hbase.util.byterange.ByteRangeSet;
+
+/**
+ * Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
+ */
+@InterfaceAudience.Private
+public class ByteRangeTreeSet extends ByteRangeSet {
+
+ /************************ constructors *****************************/
+
+ public ByteRangeTreeSet() {
+ this.uniqueIndexByUniqueRange = new TreeMap();
+ }
+
+ public ByteRangeTreeSet(List rawByteArrays) {
+ this();//needed to initialize the TreeSet
+ for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
+ add(in);
+ }
+ }
+
+ @Override
+ public void addToSortedRanges() {
+ sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
+ }
+
+}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java
new file mode 100644
index 0000000..bec5deb
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.vint;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * UFInt is an abbreviation for Unsigned Fixed-width Integer.
+ *
+ * This class converts between positive ints and 1-4 bytes that represent the int. All input ints
+ * must be positive. Max values stored in N bytes are:
+ *
+ * N=1: 2^8 => 256
+ * N=2: 2^16 => 65,536
+ * N=3: 2^24 => 16,777,216
+ * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
+ *
+ * This was created to get most of the memory savings of a variable length integer when encoding
+ * an array of input integers, but to fix the number of bytes for each integer to the number needed
+ * to store the maximum integer in the array. This enables a binary search to be performed on the
+ * array of encoded integers.
+ *
+ * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
+ * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
+ * numbers will also require 2 bytes.
+ *
+ * warnings:
+ * * no input validation for max performance
+ * * no negatives
+ */
+@InterfaceAudience.Private
+public class UFIntTool {
+
+ private static final int NUM_BITS_IN_LONG = 64;
+
+ public static long maxValueForNumBytes(int numBytes) {
+ return (1L << (numBytes * 8)) - 1;
+ }
+
+ public static int numBytes(final long value) {
+ if (value == 0) {// 0 doesn't work with the formula below
+ return 1;
+ }
+ return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
+ }
+
+ public static byte[] getBytes(int outputWidth, final long value) {
+ byte[] bytes = new byte[outputWidth];
+ writeBytes(outputWidth, value, bytes, 0);
+ return bytes;
+ }
+
+ public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
+ bytes[offset + outputWidth - 1] = (byte) value;
+ for (int i = outputWidth - 2; i >= 0; --i) {
+ bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
+ }
+ }
+
+ private static final long[] MASKS = new long[] {
+ (long) 255,
+ (long) 255 << 8,
+ (long) 255 << 16,
+ (long) 255 << 24,
+ (long) 255 << 32,
+ (long) 255 << 40,
+ (long) 255 << 48,
+ (long) 255 << 56
+ };
+
+ public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
+ for (int i = outputWidth - 1; i >= 0; --i) {
+ os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
+ }
+ }
+
+ public static long fromBytes(final byte[] bytes) {
+ long value = 0;
+ value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
+ for (int i = 1; i < bytes.length; ++i) {
+ value <<= 8;
+ value |= bytes[i] & 0xff;
+ }
+ return value;
+ }
+
+ public static long fromBytes(final byte[] bytes, final int offset, final int width) {
+ long value = 0;
+ value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
+ for (int i = 1; i < width; ++i) {
+ value <<= 8;
+ value |= bytes[i + offset] & 0xff;
+ }
+ return value;
+ }
+
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java
new file mode 100644
index 0000000..ac22684
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.vint;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
+ * bit of the current byte is 1, then there is at least one more byte.
+ */
+@InterfaceAudience.Private
+public class UVIntTool {
+
+ public static final byte
+ BYTE_7_RIGHT_BITS_SET = 127,
+ BYTE_LEFT_BIT_SET = -128;
+
+ public static final long
+ INT_7_RIGHT_BITS_SET = 127,
+ INT_8TH_BIT_SET = 128;
+
+ public static final byte[]
+ MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
+
+ /********************* int -> bytes **************************/
+
+ public static int numBytes(int in) {
+ if (in == 0) {
+ // doesn't work with the formula below
+ return 1;
+ }
+ return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
+ }
+
+ public static byte[] getBytes(int value) {
+ int numBytes = numBytes(value);
+ byte[] bytes = new byte[numBytes];
+ int remainder = value;
+ for (int i = 0; i < numBytes - 1; ++i) {
+ // set the left bit
+ bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
+ remainder >>= 7;
+ }
+ // do not set the left bit
+ bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
+ return bytes;
+ }
+
+ public static int writeBytes(int value, OutputStream os) throws IOException {
+ int numBytes = numBytes(value);
+ int remainder = value;
+ for (int i = 0; i < numBytes - 1; ++i) {
+ // set the left bit
+ os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
+ remainder >>= 7;
+ }
+ // do not set the left bit
+ os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
+ return numBytes;
+ }
+
+ /******************** bytes -> int **************************/
+
+ public static int getInt(byte[] bytes) {
+ return getInt(bytes, 0);
+ }
+
+ public static int getInt(byte[] bytes, int offset) {
+ int value = 0;
+ for (int i = 0;; ++i) {
+ byte b = bytes[offset + i];
+ int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
+ shifted <<= 7 * i;
+ value |= shifted;
+ if (b >= 0) {
+ break;
+ }
+ }
+ return value;
+ }
+
+ public static int getInt(InputStream is) throws IOException {
+ int value = 0;
+ int i = 0;
+ int b;
+ do{
+ b = is.read();
+ int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
+ shifted <<= 7 * i;
+ value |= shifted;
+ ++i;
+ }while(b > Byte.MAX_VALUE);
+ return value;
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java
new file mode 100644
index 0000000..57dba17
--- /dev/null
+++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.util.vint;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
+ * bit of the current byte is 1, then there is at least one more byte.
+ */
+@InterfaceAudience.Private
+public class UVLongTool{
+
+ public static final byte
+ BYTE_7_RIGHT_BITS_SET = 127,
+ BYTE_LEFT_BIT_SET = -128;
+
+ public static final long
+ LONG_7_RIGHT_BITS_SET = 127,
+ LONG_8TH_BIT_SET = 128;
+
+ public static final byte[]
+ MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
+
+
+ /********************* long -> bytes **************************/
+
+ public static int numBytes(long in) {// do a check for illegal arguments if not protected
+ if (in == 0) {
+ return 1;
+ }// doesn't work with the formula below
+ return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
+ }
+
+ public static byte[] getBytes(long value) {
+ int numBytes = numBytes(value);
+ byte[] bytes = new byte[numBytes];
+ long remainder = value;
+ for (int i = 0; i < numBytes - 1; ++i) {
+ bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
+ remainder >>= 7;
+ }
+ bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
+ return bytes;
+ }
+
+ public static int writeBytes(long value, OutputStream os) throws IOException {
+ int numBytes = numBytes(value);
+ long remainder = value;
+ for (int i = 0; i < numBytes - 1; ++i) {
+ // set the left bit
+ os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
+ remainder >>= 7;
+ }
+ // do not set the left bit
+ os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
+ return numBytes;
+ }
+
+ /******************** bytes -> long **************************/
+
+ public static long getLong(byte[] bytes) {
+ return getLong(bytes, 0);
+ }
+
+ public static long getLong(byte[] bytes, int offset) {
+ long value = 0;
+ for (int i = 0;; ++i) {
+ byte b = bytes[offset + i];
+ long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
+ shifted <<= 7 * i;
+ value |= shifted;
+ if (b >= 0) {
+ break;
+ }// first bit was 0, so that's the last byte in the VarLong
+ }
+ return value;
+ }
+
+ public static long getLong(InputStream is) throws IOException {
+ long value = 0;
+ int i = 0;
+ int b;
+ do {
+ b = is.read();
+ long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
+ shifted <<= 7 * i;
+ value |= shifted;
+ ++i;
+ } while (b > Byte.MAX_VALUE);
+ return value;
+ }
+}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java
deleted file mode 100644
index a696121..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java
+++ /dev/null
@@ -1,841 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
-import org.apache.hbase.util.vint.UVIntTool;
-import org.apache.hbase.util.vint.UVLongTool;
-
-/**
- * Information about the block. Stored at the beginning of the byte[]. Contains things
- * like minimum timestamp and width of FInts in the row tree.
- *
- * Most fields stored in VInts that get decoded on the first access of each new block.
- */
-@InterfaceAudience.Private
-public class PrefixTreeBlockMeta {
-
- /******************* static fields ********************/
-
- public static final int VERSION = 0;
-
- public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue
-
- public static final int
- NUM_LONGS = 2,
- NUM_INTS = 22,
- NUM_SHORTS = 0,//keyValueTypeWidth not persisted
- NUM_SINGLE_BYTES = 2,
- MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS
- + Bytes.SIZEOF_SHORT * NUM_SHORTS
- + Bytes.SIZEOF_INT * NUM_INTS
- + NUM_SINGLE_BYTES;
-
-
- /**************** transient fields *********************/
-
- protected int arrayOffset;
- protected int bufferOffset;
-
-
- /**************** persisted fields **********************/
-
- // PrefixTree version to allow future format modifications
- protected int version;
- protected int numMetaBytes;
- protected int numKeyValueBytes;
- protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte
-
- // split the byte[] into 6 sections for the different data types
- protected int numRowBytes;
- protected int numFamilyBytes;
- protected int numQualifierBytes;
- protected int numTimestampBytes;
- protected int numMvccVersionBytes;
- protected int numValueBytes;
-
- // number of bytes in each section of fixed width FInts
- protected int nextNodeOffsetWidth;
- protected int familyOffsetWidth;
- protected int qualifierOffsetWidth;
- protected int timestampIndexWidth;
- protected int mvccVersionIndexWidth;
- protected int valueOffsetWidth;
- protected int valueLengthWidth;
-
- // used to pre-allocate structures for reading
- protected int rowTreeDepth;
- protected int maxRowLength;
- protected int maxQualifierLength;
-
- // the timestamp from which the deltas are calculated
- protected long minTimestamp;
- protected int timestampDeltaWidth;
- protected long minMvccVersion;
- protected int mvccVersionDeltaWidth;
-
- protected boolean allSameType;
- protected byte allTypes;
-
- protected int numUniqueRows;
- protected int numUniqueFamilies;
- protected int numUniqueQualifiers;
-
-
- /***************** constructors ********************/
-
- public PrefixTreeBlockMeta() {
- }
-
- public PrefixTreeBlockMeta(InputStream is) throws IOException{
- this.version = VERSION;
- this.arrayOffset = 0;
- this.bufferOffset = 0;
- readVariableBytesFromInputStream(is);
- }
-
- /**
- * @param buffer positioned at start of PtBlockMeta
- */
- public PrefixTreeBlockMeta(ByteBuffer buffer) {
- initOnBlock(buffer);
- }
-
- public void initOnBlock(ByteBuffer buffer) {
- arrayOffset = buffer.arrayOffset();
- bufferOffset = buffer.position();
- readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset);
- }
-
-
- /**************** operate on each field **********************/
-
- public int calculateNumMetaBytes(){
- int numBytes = 0;
- numBytes += UVIntTool.numBytes(version);
- numBytes += UVLongTool.numBytes(numMetaBytes);
- numBytes += UVIntTool.numBytes(numKeyValueBytes);
- ++numBytes;//os.write(getIncludesMvccVersion());
-
- numBytes += UVIntTool.numBytes(numRowBytes);
- numBytes += UVIntTool.numBytes(numFamilyBytes);
- numBytes += UVIntTool.numBytes(numQualifierBytes);
- numBytes += UVIntTool.numBytes(numTimestampBytes);
- numBytes += UVIntTool.numBytes(numMvccVersionBytes);
- numBytes += UVIntTool.numBytes(numValueBytes);
-
- numBytes += UVIntTool.numBytes(nextNodeOffsetWidth);
- numBytes += UVIntTool.numBytes(familyOffsetWidth);
- numBytes += UVIntTool.numBytes(qualifierOffsetWidth);
- numBytes += UVIntTool.numBytes(timestampIndexWidth);
- numBytes += UVIntTool.numBytes(mvccVersionIndexWidth);
- numBytes += UVIntTool.numBytes(valueOffsetWidth);
- numBytes += UVIntTool.numBytes(valueLengthWidth);
-
- numBytes += UVIntTool.numBytes(rowTreeDepth);
- numBytes += UVIntTool.numBytes(maxRowLength);
- numBytes += UVIntTool.numBytes(maxQualifierLength);
-
- numBytes += UVLongTool.numBytes(minTimestamp);
- numBytes += UVIntTool.numBytes(timestampDeltaWidth);
- numBytes += UVLongTool.numBytes(minMvccVersion);
- numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth);
- ++numBytes;//os.write(getAllSameTypeByte());
- ++numBytes;//os.write(allTypes);
-
- numBytes += UVIntTool.numBytes(numUniqueRows);
- numBytes += UVIntTool.numBytes(numUniqueFamilies);
- numBytes += UVIntTool.numBytes(numUniqueQualifiers);
- return numBytes;
- }
-
- public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{
- UVIntTool.writeBytes(version, os);
- UVIntTool.writeBytes(numMetaBytes, os);
- UVIntTool.writeBytes(numKeyValueBytes, os);
- os.write(getIncludesMvccVersionByte());
-
- UVIntTool.writeBytes(numRowBytes, os);
- UVIntTool.writeBytes(numFamilyBytes, os);
- UVIntTool.writeBytes(numQualifierBytes, os);
- UVIntTool.writeBytes(numTimestampBytes, os);
- UVIntTool.writeBytes(numMvccVersionBytes, os);
- UVIntTool.writeBytes(numValueBytes, os);
-
- UVIntTool.writeBytes(nextNodeOffsetWidth, os);
- UVIntTool.writeBytes(familyOffsetWidth, os);
- UVIntTool.writeBytes(qualifierOffsetWidth, os);
- UVIntTool.writeBytes(timestampIndexWidth, os);
- UVIntTool.writeBytes(mvccVersionIndexWidth, os);
- UVIntTool.writeBytes(valueOffsetWidth, os);
- UVIntTool.writeBytes(valueLengthWidth, os);
-
- UVIntTool.writeBytes(rowTreeDepth, os);
- UVIntTool.writeBytes(maxRowLength, os);
- UVIntTool.writeBytes(maxQualifierLength, os);
-
- UVLongTool.writeBytes(minTimestamp, os);
- UVIntTool.writeBytes(timestampDeltaWidth, os);
- UVLongTool.writeBytes(minMvccVersion, os);
- UVIntTool.writeBytes(mvccVersionDeltaWidth, os);
- os.write(getAllSameTypeByte());
- os.write(allTypes);
-
- UVIntTool.writeBytes(numUniqueRows, os);
- UVIntTool.writeBytes(numUniqueFamilies, os);
- UVIntTool.writeBytes(numUniqueQualifiers, os);
- }
-
- public void readVariableBytesFromInputStream(InputStream is) throws IOException{
- version = UVIntTool.getInt(is);
- numMetaBytes = UVIntTool.getInt(is);
- numKeyValueBytes = UVIntTool.getInt(is);
- setIncludesMvccVersion((byte) is.read());
-
- numRowBytes = UVIntTool.getInt(is);
- numFamilyBytes = UVIntTool.getInt(is);
- numQualifierBytes = UVIntTool.getInt(is);
- numTimestampBytes = UVIntTool.getInt(is);
- numMvccVersionBytes = UVIntTool.getInt(is);
- numValueBytes = UVIntTool.getInt(is);
-
- nextNodeOffsetWidth = UVIntTool.getInt(is);
- familyOffsetWidth = UVIntTool.getInt(is);
- qualifierOffsetWidth = UVIntTool.getInt(is);
- timestampIndexWidth = UVIntTool.getInt(is);
- mvccVersionIndexWidth = UVIntTool.getInt(is);
- valueOffsetWidth = UVIntTool.getInt(is);
- valueLengthWidth = UVIntTool.getInt(is);
-
- rowTreeDepth = UVIntTool.getInt(is);
- maxRowLength = UVIntTool.getInt(is);
- maxQualifierLength = UVIntTool.getInt(is);
-
- minTimestamp = UVLongTool.getLong(is);
- timestampDeltaWidth = UVIntTool.getInt(is);
- minMvccVersion = UVLongTool.getLong(is);
- mvccVersionDeltaWidth = UVIntTool.getInt(is);
-
- setAllSameType((byte) is.read());
- allTypes = (byte) is.read();
-
- numUniqueRows = UVIntTool.getInt(is);
- numUniqueFamilies = UVIntTool.getInt(is);
- numUniqueQualifiers = UVIntTool.getInt(is);
- }
-
- public void readVariableBytesFromArray(byte[] bytes, int offset) {
- int position = offset;
-
- version = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(version);
- numMetaBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numMetaBytes);
- numKeyValueBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numKeyValueBytes);
- setIncludesMvccVersion(bytes[position]);
- ++position;
-
- numRowBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numRowBytes);
- numFamilyBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numFamilyBytes);
- numQualifierBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numQualifierBytes);
- numTimestampBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numTimestampBytes);
- numMvccVersionBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numMvccVersionBytes);
- numValueBytes = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numValueBytes);
-
- nextNodeOffsetWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(nextNodeOffsetWidth);
- familyOffsetWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(familyOffsetWidth);
- qualifierOffsetWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(qualifierOffsetWidth);
- timestampIndexWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(timestampIndexWidth);
- mvccVersionIndexWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(mvccVersionIndexWidth);
- valueOffsetWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(valueOffsetWidth);
- valueLengthWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(valueLengthWidth);
-
- rowTreeDepth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(rowTreeDepth);
- maxRowLength = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(maxRowLength);
- maxQualifierLength = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(maxQualifierLength);
-
- minTimestamp = UVLongTool.getLong(bytes, position);
- position += UVLongTool.numBytes(minTimestamp);
- timestampDeltaWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(timestampDeltaWidth);
- minMvccVersion = UVLongTool.getLong(bytes, position);
- position += UVLongTool.numBytes(minMvccVersion);
- mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(mvccVersionDeltaWidth);
-
- setAllSameType(bytes[position]);
- ++position;
- allTypes = bytes[position];
- ++position;
-
- numUniqueRows = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numUniqueRows);
- numUniqueFamilies = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numUniqueFamilies);
- numUniqueQualifiers = UVIntTool.getInt(bytes, position);
- position += UVIntTool.numBytes(numUniqueQualifiers);
- }
-
- //TODO method that can read directly from ByteBuffer instead of InputStream
-
-
- /*************** methods *************************/
-
- public int getKeyValueTypeWidth() {
- return allSameType ? 0 : 1;
- }
-
- public byte getIncludesMvccVersionByte() {
- return includesMvccVersion ? (byte) 1 : (byte) 0;
- }
-
- public void setIncludesMvccVersion(byte includesMvccVersionByte) {
- includesMvccVersion = includesMvccVersionByte != 0;
- }
-
- public byte getAllSameTypeByte() {
- return allSameType ? (byte) 1 : (byte) 0;
- }
-
- public void setAllSameType(byte allSameTypeByte) {
- allSameType = allSameTypeByte != 0;
- }
-
- public boolean isAllSameTimestamp() {
- return timestampIndexWidth == 0;
- }
-
- public boolean isAllSameMvccVersion() {
- return mvccVersionIndexWidth == 0;
- }
-
- public void setTimestampFields(LongEncoder encoder){
- this.minTimestamp = encoder.getMin();
- this.timestampIndexWidth = encoder.getBytesPerIndex();
- this.timestampDeltaWidth = encoder.getBytesPerDelta();
- this.numTimestampBytes = encoder.getTotalCompressedBytes();
- }
-
- public void setMvccVersionFields(LongEncoder encoder){
- this.minMvccVersion = encoder.getMin();
- this.mvccVersionIndexWidth = encoder.getBytesPerIndex();
- this.mvccVersionDeltaWidth = encoder.getBytesPerDelta();
- this.numMvccVersionBytes = encoder.getTotalCompressedBytes();
- }
-
-
- /*************** Object methods *************************/
-
- /**
- * Generated by Eclipse
- */
- @Override
- public boolean equals(Object obj) {
- if (this == obj)
- return true;
- if (obj == null)
- return false;
- if (getClass() != obj.getClass())
- return false;
- PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj;
- if (allSameType != other.allSameType)
- return false;
- if (allTypes != other.allTypes)
- return false;
- if (arrayOffset != other.arrayOffset)
- return false;
- if (bufferOffset != other.bufferOffset)
- return false;
- if (valueLengthWidth != other.valueLengthWidth)
- return false;
- if (valueOffsetWidth != other.valueOffsetWidth)
- return false;
- if (familyOffsetWidth != other.familyOffsetWidth)
- return false;
- if (includesMvccVersion != other.includesMvccVersion)
- return false;
- if (maxQualifierLength != other.maxQualifierLength)
- return false;
- if (maxRowLength != other.maxRowLength)
- return false;
- if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth)
- return false;
- if (mvccVersionIndexWidth != other.mvccVersionIndexWidth)
- return false;
- if (minMvccVersion != other.minMvccVersion)
- return false;
- if (minTimestamp != other.minTimestamp)
- return false;
- if (nextNodeOffsetWidth != other.nextNodeOffsetWidth)
- return false;
- if (numValueBytes != other.numValueBytes)
- return false;
- if (numFamilyBytes != other.numFamilyBytes)
- return false;
- if (numMvccVersionBytes != other.numMvccVersionBytes)
- return false;
- if (numMetaBytes != other.numMetaBytes)
- return false;
- if (numQualifierBytes != other.numQualifierBytes)
- return false;
- if (numRowBytes != other.numRowBytes)
- return false;
- if (numTimestampBytes != other.numTimestampBytes)
- return false;
- if (numUniqueFamilies != other.numUniqueFamilies)
- return false;
- if (numUniqueQualifiers != other.numUniqueQualifiers)
- return false;
- if (numUniqueRows != other.numUniqueRows)
- return false;
- if (numKeyValueBytes != other.numKeyValueBytes)
- return false;
- if (qualifierOffsetWidth != other.qualifierOffsetWidth)
- return false;
- if (rowTreeDepth != other.rowTreeDepth)
- return false;
- if (timestampDeltaWidth != other.timestampDeltaWidth)
- return false;
- if (timestampIndexWidth != other.timestampIndexWidth)
- return false;
- if (version != other.version)
- return false;
- return true;
- }
-
- /**
- * Generated by Eclipse
- */
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + (allSameType ? 1231 : 1237);
- result = prime * result + allTypes;
- result = prime * result + arrayOffset;
- result = prime * result + bufferOffset;
- result = prime * result + valueLengthWidth;
- result = prime * result + valueOffsetWidth;
- result = prime * result + familyOffsetWidth;
- result = prime * result + (includesMvccVersion ? 1231 : 1237);
- result = prime * result + maxQualifierLength;
- result = prime * result + maxRowLength;
- result = prime * result + mvccVersionDeltaWidth;
- result = prime * result + mvccVersionIndexWidth;
- result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32));
- result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32));
- result = prime * result + nextNodeOffsetWidth;
- result = prime * result + numValueBytes;
- result = prime * result + numFamilyBytes;
- result = prime * result + numMvccVersionBytes;
- result = prime * result + numMetaBytes;
- result = prime * result + numQualifierBytes;
- result = prime * result + numRowBytes;
- result = prime * result + numTimestampBytes;
- result = prime * result + numUniqueFamilies;
- result = prime * result + numUniqueQualifiers;
- result = prime * result + numUniqueRows;
- result = prime * result + numKeyValueBytes;
- result = prime * result + qualifierOffsetWidth;
- result = prime * result + rowTreeDepth;
- result = prime * result + timestampDeltaWidth;
- result = prime * result + timestampIndexWidth;
- result = prime * result + version;
- return result;
- }
-
- /**
- * Generated by Eclipse
- */
- @Override
- public String toString() {
- StringBuilder builder = new StringBuilder();
- builder.append("PtBlockMeta [arrayOffset=");
- builder.append(arrayOffset);
- builder.append(", bufferOffset=");
- builder.append(bufferOffset);
- builder.append(", version=");
- builder.append(version);
- builder.append(", numMetaBytes=");
- builder.append(numMetaBytes);
- builder.append(", numKeyValueBytes=");
- builder.append(numKeyValueBytes);
- builder.append(", includesMvccVersion=");
- builder.append(includesMvccVersion);
- builder.append(", numRowBytes=");
- builder.append(numRowBytes);
- builder.append(", numFamilyBytes=");
- builder.append(numFamilyBytes);
- builder.append(", numQualifierBytes=");
- builder.append(numQualifierBytes);
- builder.append(", numTimestampBytes=");
- builder.append(numTimestampBytes);
- builder.append(", numMvccVersionBytes=");
- builder.append(numMvccVersionBytes);
- builder.append(", numValueBytes=");
- builder.append(numValueBytes);
- builder.append(", nextNodeOffsetWidth=");
- builder.append(nextNodeOffsetWidth);
- builder.append(", familyOffsetWidth=");
- builder.append(familyOffsetWidth);
- builder.append(", qualifierOffsetWidth=");
- builder.append(qualifierOffsetWidth);
- builder.append(", timestampIndexWidth=");
- builder.append(timestampIndexWidth);
- builder.append(", mvccVersionIndexWidth=");
- builder.append(mvccVersionIndexWidth);
- builder.append(", valueOffsetWidth=");
- builder.append(valueOffsetWidth);
- builder.append(", valueLengthWidth=");
- builder.append(valueLengthWidth);
- builder.append(", rowTreeDepth=");
- builder.append(rowTreeDepth);
- builder.append(", maxRowLength=");
- builder.append(maxRowLength);
- builder.append(", maxQualifierLength=");
- builder.append(maxQualifierLength);
- builder.append(", minTimestamp=");
- builder.append(minTimestamp);
- builder.append(", timestampDeltaWidth=");
- builder.append(timestampDeltaWidth);
- builder.append(", minMvccVersion=");
- builder.append(minMvccVersion);
- builder.append(", mvccVersionDeltaWidth=");
- builder.append(mvccVersionDeltaWidth);
- builder.append(", allSameType=");
- builder.append(allSameType);
- builder.append(", allTypes=");
- builder.append(allTypes);
- builder.append(", numUniqueRows=");
- builder.append(numUniqueRows);
- builder.append(", numUniqueFamilies=");
- builder.append(numUniqueFamilies);
- builder.append(", numUniqueQualifiers=");
- builder.append(numUniqueQualifiers);
- builder.append("]");
- return builder.toString();
- }
-
-
- /************** absolute getters *******************/
-
- public int getAbsoluteMetaOffset() {
- return arrayOffset + bufferOffset;
- }
-
- public int getAbsoluteRowOffset() {
- return getAbsoluteMetaOffset() + numMetaBytes;
- }
-
- public int getAbsoluteFamilyOffset() {
- return getAbsoluteRowOffset() + numRowBytes;
- }
-
- public int getAbsoluteQualifierOffset() {
- return getAbsoluteFamilyOffset() + numFamilyBytes;
- }
-
- public int getAbsoluteTimestampOffset() {
- return getAbsoluteQualifierOffset() + numQualifierBytes;
- }
-
- public int getAbsoluteMvccVersionOffset() {
- return getAbsoluteTimestampOffset() + numTimestampBytes;
- }
-
- public int getAbsoluteValueOffset() {
- return getAbsoluteMvccVersionOffset() + numMvccVersionBytes;
- }
-
-
- /*************** get/set ***************************/
-
- public int getTimestampDeltaWidth() {
- return timestampDeltaWidth;
- }
-
- public void setTimestampDeltaWidth(int timestampDeltaWidth) {
- this.timestampDeltaWidth = timestampDeltaWidth;
- }
-
- public int getValueOffsetWidth() {
- return valueOffsetWidth;
- }
-
- public void setValueOffsetWidth(int dataOffsetWidth) {
- this.valueOffsetWidth = dataOffsetWidth;
- }
-
- public int getValueLengthWidth() {
- return valueLengthWidth;
- }
-
- public void setValueLengthWidth(int dataLengthWidth) {
- this.valueLengthWidth = dataLengthWidth;
- }
-
- public int getMaxRowLength() {
- return maxRowLength;
- }
-
- public void setMaxRowLength(int maxRowLength) {
- this.maxRowLength = maxRowLength;
- }
-
- public long getMinTimestamp() {
- return minTimestamp;
- }
-
- public void setMinTimestamp(long minTimestamp) {
- this.minTimestamp = minTimestamp;
- }
-
- public byte getAllTypes() {
- return allTypes;
- }
-
- public void setAllTypes(byte allTypes) {
- this.allTypes = allTypes;
- }
-
- public boolean isAllSameType() {
- return allSameType;
- }
-
- public void setAllSameType(boolean allSameType) {
- this.allSameType = allSameType;
- }
-
- public int getNextNodeOffsetWidth() {
- return nextNodeOffsetWidth;
- }
-
- public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) {
- this.nextNodeOffsetWidth = nextNodeOffsetWidth;
- }
-
- public int getNumRowBytes() {
- return numRowBytes;
- }
-
- public void setNumRowBytes(int numRowBytes) {
- this.numRowBytes = numRowBytes;
- }
-
- public int getNumTimestampBytes() {
- return numTimestampBytes;
- }
-
- public void setNumTimestampBytes(int numTimestampBytes) {
- this.numTimestampBytes = numTimestampBytes;
- }
-
- public int getNumValueBytes() {
- return numValueBytes;
- }
-
- public void setNumValueBytes(int numValueBytes) {
- this.numValueBytes = numValueBytes;
- }
-
- public int getNumMetaBytes() {
- return numMetaBytes;
- }
-
- public void setNumMetaBytes(int numMetaBytes) {
- this.numMetaBytes = numMetaBytes;
- }
-
- public int getArrayOffset() {
- return arrayOffset;
- }
-
- public void setArrayOffset(int arrayOffset) {
- this.arrayOffset = arrayOffset;
- }
-
- public int getBufferOffset() {
- return bufferOffset;
- }
-
- public void setBufferOffset(int bufferOffset) {
- this.bufferOffset = bufferOffset;
- }
-
- public int getNumKeyValueBytes() {
- return numKeyValueBytes;
- }
-
- public void setNumKeyValueBytes(int numKeyValueBytes) {
- this.numKeyValueBytes = numKeyValueBytes;
- }
-
- public int getRowTreeDepth() {
- return rowTreeDepth;
- }
-
- public void setRowTreeDepth(int rowTreeDepth) {
- this.rowTreeDepth = rowTreeDepth;
- }
-
- public int getNumMvccVersionBytes() {
- return numMvccVersionBytes;
- }
-
- public void setNumMvccVersionBytes(int numMvccVersionBytes) {
- this.numMvccVersionBytes = numMvccVersionBytes;
- }
-
- public int getMvccVersionDeltaWidth() {
- return mvccVersionDeltaWidth;
- }
-
- public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) {
- this.mvccVersionDeltaWidth = mvccVersionDeltaWidth;
- }
-
- public long getMinMvccVersion() {
- return minMvccVersion;
- }
-
- public void setMinMvccVersion(long minMvccVersion) {
- this.minMvccVersion = minMvccVersion;
- }
-
- public int getNumFamilyBytes() {
- return numFamilyBytes;
- }
-
- public void setNumFamilyBytes(int numFamilyBytes) {
- this.numFamilyBytes = numFamilyBytes;
- }
-
- public int getFamilyOffsetWidth() {
- return familyOffsetWidth;
- }
-
- public void setFamilyOffsetWidth(int familyOffsetWidth) {
- this.familyOffsetWidth = familyOffsetWidth;
- }
-
- public int getNumUniqueRows() {
- return numUniqueRows;
- }
-
- public void setNumUniqueRows(int numUniqueRows) {
- this.numUniqueRows = numUniqueRows;
- }
-
- public int getNumUniqueFamilies() {
- return numUniqueFamilies;
- }
-
- public void setNumUniqueFamilies(int numUniqueFamilies) {
- this.numUniqueFamilies = numUniqueFamilies;
- }
-
- public int getNumUniqueQualifiers() {
- return numUniqueQualifiers;
- }
-
- public void setNumUniqueQualifiers(int numUniqueQualifiers) {
- this.numUniqueQualifiers = numUniqueQualifiers;
- }
-
- public int getNumQualifierBytes() {
- return numQualifierBytes;
- }
-
- public void setNumQualifierBytes(int numQualifierBytes) {
- this.numQualifierBytes = numQualifierBytes;
- }
-
- public int getQualifierOffsetWidth() {
- return qualifierOffsetWidth;
- }
-
- public void setQualifierOffsetWidth(int qualifierOffsetWidth) {
- this.qualifierOffsetWidth = qualifierOffsetWidth;
- }
-
- public int getMaxQualifierLength() {
- return maxQualifierLength;
- }
-
- public void setMaxQualifierLength(int maxQualifierLength) {
- this.maxQualifierLength = maxQualifierLength;
- }
-
- public int getTimestampIndexWidth() {
- return timestampIndexWidth;
- }
-
- public void setTimestampIndexWidth(int timestampIndexWidth) {
- this.timestampIndexWidth = timestampIndexWidth;
- }
-
- public int getMvccVersionIndexWidth() {
- return mvccVersionIndexWidth;
- }
-
- public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) {
- this.mvccVersionIndexWidth = mvccVersionIndexWidth;
- }
-
- public int getVersion() {
- return version;
- }
-
- public void setVersion(int version) {
- this.version = version;
- }
-
- public boolean isIncludesMvccVersion() {
- return includesMvccVersion;
- }
-
- public void setIncludesMvccVersion(boolean includesMvccVersion) {
- this.includesMvccVersion = includesMvccVersion;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java
deleted file mode 100644
index d6a80b2..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValue.KeyComparator;
-import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator;
-import org.apache.hadoop.hbase.KeyValue.RootKeyComparator;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
-import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
-import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
-import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
-import org.apache.hadoop.hbase.io.hfile.BlockType;
-import org.apache.hadoop.hbase.util.ByteBufferUtils;
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
-import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
-import org.apache.hbase.codec.prefixtree.encode.EncoderFactory;
-import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
-import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
-
-/**
- * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
- * package changes.
- *
- * PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point
- * for PrefixTree encoding and decoding. Encoding is delegated to instances of
- * {@link PrefixTreeEncoder}, and decoding is delegated to instances of
- * {@link org.apache.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are
- * created and recycled by static PtEncoderFactory and PtDecoderFactory.
- */
-@InterfaceAudience.Private
-public class PrefixTreeCodec implements DataBlockEncoder{
-
- /**
- * no-arg constructor for reflection
- */
- public PrefixTreeCodec() {
- }
-
- /**
- * Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar
- * enough with the concept of the HFileBlockEncodingContext.
- */
- @Override
- public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion,
- HFileBlockEncodingContext blkEncodingCtx) throws IOException {
- if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
- throw new IOException(this.getClass().getName() + " only accepts "
- + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context.");
- }
-
- HFileBlockDefaultEncodingContext encodingCtx
- = (HFileBlockDefaultEncodingContext) blkEncodingCtx;
- encodingCtx.prepareEncoding();
- DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder();
- internalEncodeKeyValues(dataOut, in, includesMvccVersion);
-
- //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
- if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
- encodingCtx.postEncoding(BlockType.ENCODED_DATA);
- } else {
- encodingCtx.postEncoding(BlockType.DATA);
- }
- }
-
- private void internalEncodeKeyValues(DataOutputStream encodedOutputStream,
- ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException {
- rawKeyValues.rewind();
- PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion);
-
- try{
- KeyValue kv;
- while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) {
- builder.write(kv);
- }
- builder.flush();
- }finally{
- EncoderFactory.checkIn(builder);
- }
- }
-
-
- @Override
- public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion)
- throws IOException {
- return decodeKeyValues(source, 0, 0, includesMvccVersion);
- }
-
-
- /**
- * I don't think this method is called during normal HBase operation, so efficiency is not
- * important.
- */
- @Override
- public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
- int skipLastBytes, boolean includesMvccVersion) throws IOException {
- ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
- sourceAsBuffer.mark();
- PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer);
- sourceAsBuffer.rewind();
- int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes();
- byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader];
- ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader);
- result.rewind();
- CellSearcher searcher = null;
- try {
- searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion);
- while (searcher.advance()) {
- KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current());
- // needs to be modified for DirectByteBuffers. no existing methods to
- // write VLongs to byte[]
- int offset = result.arrayOffset() + result.position();
- KeyValueUtil.appendToByteArray(currentCell, result.array(), offset);
- int keyValueLength = KeyValueUtil.length(currentCell);
- ByteBufferUtils.skip(result, keyValueLength);
- offset += keyValueLength;
- if (includesMvccVersion) {
- ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion());
- }
- }
- result.position(result.limit());//make it appear as if we were appending
- return result;
- } finally {
- DecoderFactory.checkIn(searcher);
- }
- }
-
-
- @Override
- public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
- block.rewind();
- PrefixTreeArraySearcher searcher = null;
- try {
- //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
- searcher = DecoderFactory.checkOut(block, true);
- if (!searcher.positionAtFirstCell()) {
- return null;
- }
- return KeyValueUtil.copyKeyToNewByteBuffer(searcher.current());
- } finally {
- DecoderFactory.checkIn(searcher);
- }
- }
-
- @Override
- public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm,
- DataBlockEncoding encoding, byte[] header) {
- if(DataBlockEncoding.PREFIX_TREE != encoding){
- //i'm not sure why encoding is in the interface. Each encoder implementation should probably
- //know it's encoding type
- throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
- }
- return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header);
- }
-
- @Override
- public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) {
- return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
- }
-
- /**
- * Is this the correct handling of an illegal comparator? How to prevent that from getting all
- * the way to this point.
- */
- @Override
- public EncodedSeeker createSeeker(RawComparator comparator, boolean includesMvccVersion) {
- if(! (comparator instanceof KeyComparator)){
- throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator");
- }
- if(comparator instanceof MetaKeyComparator){
- throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META "
- +"table");
- }
- if(comparator instanceof RootKeyComparator){
- throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT "
- +"table");
- }
-
- return new PrefixTreeSeeker(includesMvccVersion);
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java
deleted file mode 100644
index 85c6484..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java
+++ /dev/null
@@ -1,215 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
-import org.apache.hbase.codec.prefixtree.decode.DecoderFactory;
-import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
-import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition;
-
-/**
- * These methods have the same definition as any implementation of the EncodedSeeker.
- *
- * In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It
- * currently returns a new KeyValue object each time getKeyValue is called. This is not horrible,
- * but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in
- * the data from the PrefixTreeCell. It is somewhat heavyweight right now.
- */
-@InterfaceAudience.Private
-public class PrefixTreeSeeker implements EncodedSeeker {
-
- protected ByteBuffer block;
- protected boolean includeMvccVersion;
- protected PrefixTreeArraySearcher ptSearcher;
-
- public PrefixTreeSeeker(boolean includeMvccVersion) {
- this.includeMvccVersion = includeMvccVersion;
- }
-
- @Override
- public void setCurrentBuffer(ByteBuffer fullBlockBuffer) {
- block = fullBlockBuffer;
- ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion);
- rewind();
- }
-
- /**
- * Currently unused.
- *
- * TODO performance leak. should reuse the searchers. hbase does not currently have a hook where
- * this can be called
- */
- public void releaseCurrentSearcher(){
- DecoderFactory.checkIn(ptSearcher);
- }
-
-
- @Override
- public ByteBuffer getKeyDeepCopy() {
- return KeyValueUtil.copyKeyToNewByteBuffer(ptSearcher.current());
- }
-
-
- @Override
- public ByteBuffer getValueShallowCopy() {
- return CellUtil.getValueBufferShallowCopy(ptSearcher.current());
- }
-
- /**
- * currently must do deep copy into new array
- */
- @Override
- public ByteBuffer getKeyValueBuffer() {
- return KeyValueUtil.copyToNewByteBuffer(ptSearcher.current());
- }
-
- /**
- * currently must do deep copy into new array
- */
- @Override
- public KeyValue getKeyValue() {
- return KeyValueUtil.copyToNewKeyValue(ptSearcher.current());
- }
-
- /**
- * Currently unused.
- *
- * A nice, lightweight reference, though the underlying cell is transient. This method may return
- * the same reference to the backing PrefixTreeCell repeatedly, while other implementations may
- * return a different reference for each Cell.
- *
- * The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to
- * use this method instead of the getKeyValue() methods above.
- */
- public Cell get() {
- return ptSearcher.current();
- }
-
- @Override
- public void rewind() {
- ptSearcher.positionAtFirstCell();
- }
-
- @Override
- public boolean next() {
- return ptSearcher.advance();
- }
-
-// @Override
- public boolean advance() {
- return ptSearcher.advance();
- }
-
-
- private static final boolean USE_POSITION_BEFORE = false;
-
- /**
- * Seek forward only (should be called reseekToKeyInBlock?).
- *
- * If the exact key is found look at the seekBefore variable and:
- * - if true: go to the previous key if it's true
- * - if false: stay on the exact key
- *
- * If the exact key is not found, then go to the previous key *if possible*, but remember to
- * leave the scanner in a valid state if possible.
- *
- * @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker
- * @param offset offset into the keyOnlyBytes array
- * @param length number of bytes of the keyOnlyBytes array to use
- * @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up 1 Cell
- * @return 0 if the seeker is on the exact key
- * 1 if the seeker is not on the key for any reason, including seekBefore being true
- */
- @Override
- public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length,
- boolean forceBeforeOnExactMatch) {
- if (USE_POSITION_BEFORE) {
- return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length,
- forceBeforeOnExactMatch);
- }else{
- return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length,
- forceBeforeOnExactMatch);
- }
- }
-
-
-
- /*
- * Support both of these options since the underlying PrefixTree supports both. Possibly
- * expand the EncodedSeeker to utilize them both.
- */
-
- protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length,
- boolean forceBeforeOnExactMatch){
- // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
- KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
-
- CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv);
-
- if(CellScannerPosition.AT == position){
- if (forceBeforeOnExactMatch) {
- ptSearcher.previous();
- return 1;
- }
- return 0;
- }
-
- return 1;
- }
-
-
- protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length,
- boolean forceBeforeOnExactMatch){
- // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell
- KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length);
-
- //should probably switch this to use the seekForwardToOrBefore method
- CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv);
-
- if(CellScannerPosition.AT == position){
- if (forceBeforeOnExactMatch) {
- ptSearcher.previous();
- return 1;
- }
- return 0;
-
- }
-
- if(CellScannerPosition.AFTER == position){
- if(!ptSearcher.isBeforeFirst()){
- ptSearcher.previous();
- }
- return 1;
- }
-
- if(position == CellScannerPosition.AFTER_LAST){
- return 1;
- }
-
- throw new RuntimeException("unexpected CellScannerPosition:"+position);
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java
deleted file mode 100644
index 8f49ec1..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import java.nio.ByteBuffer;
-import java.util.Queue;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of
- * objects and 1 is needed for each HFile during a Get operation. With tens of thousands of
- * Gets/second, reusing these searchers may save a lot of young gen collections.
- *
- * Alternative implementation would be a ByteBufferSearcherPool (not implemented yet).
- */
-@InterfaceAudience.Private
-public class ArraySearcherPool {
-
- /**
- * One decoder is needed for each storefile for each Get operation so we may need hundreds at the
- * same time, however, decoding is a CPU bound activity so should limit this to something in the
- * realm of maximum reasonable active threads.
- */
- private static final Integer MAX_POOL_SIZE = 1000;
-
- protected Queue pool
- = new LinkedBlockingQueue(MAX_POOL_SIZE);
-
- public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) {
- PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty
- searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion);
- return searcher;
- }
-
- public void checkIn(PrefixTreeArraySearcher searcher) {
- searcher.releaseBlockReference();
- pool.offer(searcher);
- }
-
- @Override
- public String toString() {
- return ("poolSize:" + pool.size());
- }
-
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java
deleted file mode 100644
index 7d98fde..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
-
-/**
- * Static wrapper class for the ArraySearcherPool.
- */
-@InterfaceAudience.Private
-public class DecoderFactory {
-
- private static final ArraySearcherPool POOL = new ArraySearcherPool();
-
- //TODO will need a PrefixTreeSearcher on top of CellSearcher
- public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer,
- boolean includeMvccVersion) {
- if (buffer.isDirect()) {
- throw new IllegalArgumentException("DirectByteBuffers not supported yet");
- // TODO implement PtByteBufferBlockScanner
- }
-
- PrefixTreeArraySearcher searcher = POOL.checkOut(buffer,
- includeMvccVersion);
- return searcher;
- }
-
- public static void checkIn(CellSearcher pSearcher) {
- if (pSearcher == null) {
- return;
- }
- if (! (pSearcher instanceof PrefixTreeArraySearcher)) {
- throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to "
- +DecoderFactory.class);
- }
- PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher;
- POOL.checkIn(searcher);
- }
-
-
- /**************************** helper ******************************/
-
- public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer,
- PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
- if (searcher == null) {
- PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
- searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
- blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength());
- searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
- return searcher;
- }
-
- PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta();
- blockMeta.initOnBlock(buffer);
- if (!searcher.areBuffersBigEnough()) {
- int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(),
- searcher.getMaxRowTreeStackNodes());
- int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
- int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
- searcher.getQualifierBufferLength());
- searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
- qualifierBufferLength);
- }
- //this is where we parse the BlockMeta
- searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
- return searcher;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java
deleted file mode 100644
index 1ce90e6..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.scanner.ReversibleCellScanner;
-
-/**
- * Methods for going backwards through a PrefixTree block. This class is split out on its own to
- * simplify the Scanner superclass and Searcher subclass.
- */
-@InterfaceAudience.Private
-public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements
- ReversibleCellScanner {
-
- /***************** construct ******************************/
-
- public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
- int rowBufferLength, int qualifierBufferLength) {
- super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
- }
-
-
- /***************** Object methods ***************************/
-
- @Override
- public boolean equals(Object obj) {
- //trivial override to confirm intent (findbugs)
- return super.equals(obj);
- }
-
-
- /***************** methods **********************************/
-
- @Override
- public boolean previous() {
- if (afterLast) {
- afterLast = false;
- positionAtLastCell();
- return true;
- }
- if (beforeFirst) {
- return false;
- }
- if (isFirstCellInRow()) {
- previousRowInternal();
- if (beforeFirst) {
- return false;
- }
- populateLastNonRowFields();
- return true;
- }
- populatePreviousNonRowFields();
- return true;
- }
-
- @Override
- public boolean previousRow(boolean endOfRow) {
- previousRowInternal();
- if(beforeFirst){
- return false;
- }
- if(endOfRow){
- populateLastNonRowFields();
- }else{
- populateFirstNonRowFields();
- }
- return true;
- }
-
- private boolean previousRowInternal() {
- if (beforeFirst) {
- return false;
- }
- if (afterLast) {
- positionAtLastRow();
- return true;
- }
- if (currentRowNode.hasOccurrences()) {
- discardCurrentRowNode(false);
- if(currentRowNode==null){
- return false;
- }
- }
- while (!beforeFirst) {
- if (isDirectlyAfterNub()) {//we are about to back up to the nub
- currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf
- nubCellsRemain = true;//this positions us on the nub
- return true;
- }
- if (currentRowNode.hasPreviousFanNodes()) {
- followPreviousFan();
- descendToLastRowFromCurrentPosition();
- } else {// keep going up the stack until we find previous fan positions
- discardCurrentRowNode(false);
- if(currentRowNode==null){
- return false;
- }
- }
- if (currentRowNode.hasOccurrences()) {// escape clause
- return true;// found some values
- }
- }
- return false;// went past the beginning
- }
-
- protected boolean isDirectlyAfterNub() {
- return currentRowNode.isNub() && currentRowNode.getFanIndex()==0;
- }
-
- protected void positionAtLastRow() {
- reInitFirstNode();
- descendToLastRowFromCurrentPosition();
- }
-
- protected void descendToLastRowFromCurrentPosition() {
- while (currentRowNode.hasChildren()) {
- followLastFan();
- }
- }
-
- protected void positionAtLastCell() {
- positionAtLastRow();
- populateLastNonRowFields();
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java
deleted file mode 100644
index 398bd5d..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java
+++ /dev/null
@@ -1,506 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellScanner;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader;
-import org.apache.hbase.codec.prefixtree.decode.row.RowNodeReader;
-import org.apache.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
-import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
-
-/**
- * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
- * call get/set methods.
- *
- * This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This
- * implementation requires that the bytes be in a normal java byte[] for performance. The
- * alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer
- * without copying the whole buffer on-heap.
- */
-@InterfaceAudience.Private
-public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner {
-
- /***************** fields ********************************/
-
- protected PrefixTreeBlockMeta blockMeta;
-
- protected boolean beforeFirst;
- protected boolean afterLast;
-
- protected RowNodeReader[] rowNodes;
- protected int rowNodeStackIndex;
-
- protected RowNodeReader currentRowNode;
- protected ColumnReader familyReader;
- protected ColumnReader qualifierReader;
- protected TimestampDecoder timestampDecoder;
- protected MvccVersionDecoder mvccVersionDecoder;
-
- protected boolean nubCellsRemain;
- protected int currentCellIndex;
-
-
- /*********************** construct ******************************/
-
- // pass in blockMeta so we can initialize buffers big enough for all cells in the block
- public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
- int rowBufferLength, int qualifierBufferLength) {
- this.rowNodes = new RowNodeReader[rowTreeDepth];
- for (int i = 0; i < rowNodes.length; ++i) {
- rowNodes[i] = new RowNodeReader();
- }
- this.rowBuffer = new byte[rowBufferLength];
- this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
- this.familyReader = new ColumnReader(familyBuffer, true);
- this.qualifierBuffer = new byte[qualifierBufferLength];
- this.qualifierReader = new ColumnReader(qualifierBuffer, false);
- this.timestampDecoder = new TimestampDecoder();
- this.mvccVersionDecoder = new MvccVersionDecoder();
- }
-
-
- /**************** init helpers ***************************************/
-
- /**
- * Call when first accessing a block.
- * @return entirely new scanner if false
- */
- public boolean areBuffersBigEnough() {
- if (rowNodes.length < blockMeta.getRowTreeDepth()) {
- return false;
- }
- if (rowBuffer.length < blockMeta.getMaxRowLength()) {
- return false;
- }
- if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
- return false;
- }
- return true;
- }
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block,
- boolean includeMvccVersion) {
- this.block = block;
- this.blockMeta = blockMeta;
- this.familyOffset = familyBuffer.length;
- this.familyReader.initOnBlock(blockMeta, block);
- this.qualifierOffset = qualifierBuffer.length;
- this.qualifierReader.initOnBlock(blockMeta, block);
- this.timestampDecoder.initOnBlock(blockMeta, block);
- this.mvccVersionDecoder.initOnBlock(blockMeta, block);
- this.includeMvccVersion = includeMvccVersion;
- resetToBeforeFirstEntry();
- }
-
- // Does this have to be in the CellScanner Interface? TODO
- public void resetToBeforeFirstEntry() {
- beforeFirst = true;
- afterLast = false;
- rowNodeStackIndex = -1;
- currentRowNode = null;
- rowLength = 0;
- familyOffset = familyBuffer.length;
- familyLength = 0;
- qualifierOffset = blockMeta.getMaxQualifierLength();
- qualifierLength = 0;
- nubCellsRemain = false;
- currentCellIndex = -1;
- timestamp = -1L;
- type = DEFAULT_TYPE;
- absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
- valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
- }
-
- /**
- * Call this before putting the scanner back into a pool so it doesn't hold the last used block
- * in memory.
- */
- public void releaseBlockReference(){
- block = null;
- }
-
-
- /********************** CellScanner **********************/
-
- @Override
- public Cell current() {
- if(isOutOfBounds()){
- return null;
- }
- return (Cell)this;
- }
-
- /******************* Object methods ************************/
-
- @Override
- public boolean equals(Object obj) {
- //trivial override to confirm intent (findbugs)
- return super.equals(obj);
- }
-
- @Override
- public int hashCode() {
- return super.hashCode();
- }
-
- /**
- * Override PrefixTreeCell.toString() with a check to see if the current cell is valid.
- */
- @Override
- public String toString() {
- Cell currentCell = current();
- if(currentCell==null){
- return "null";
- }
- return ((PrefixTreeCell)currentCell).getKeyValueString();
- }
-
-
- /******************* advance ***************************/
-
- public boolean positionAtFirstCell() {
- reInitFirstNode();
- return advance();
- }
-
- @Override
- public boolean advance() {
- if (afterLast) {
- return false;
- }
- if (!hasOccurrences()) {
- resetToBeforeFirstEntry();
- }
- if (beforeFirst || isLastCellInRow()) {
- nextRow();
- if (afterLast) {
- return false;
- }
- } else {
- ++currentCellIndex;
- }
-
- populateNonRowFields(currentCellIndex);
- return true;
- }
-
-
- public boolean nextRow() {
- nextRowInternal();
- if (afterLast) {
- return false;
- }
- populateNonRowFields(currentCellIndex);
- return true;
- }
-
-
- /**
- * This method is safe to call when the scanner is not on a fully valid row node, as in the case
- * of a row token miss in the Searcher
- * @return true if we are positioned on a valid row, false if past end of block
- */
- protected boolean nextRowInternal() {
- if (afterLast) {
- return false;
- }
- if (beforeFirst) {
- initFirstNode();
- if (currentRowNode.hasOccurrences()) {
- if (currentRowNode.isNub()) {
- nubCellsRemain = true;
- }
- currentCellIndex = 0;
- return true;
- }
- }
- if (currentRowNode.isLeaf()) {
- discardCurrentRowNode(true);
- }
- while (!afterLast) {
- if (nubCellsRemain) {
- nubCellsRemain = false;
- }
- if (currentRowNode.hasMoreFanNodes()) {
- followNextFan();
- if (currentRowNode.hasOccurrences()) {
- currentCellIndex = 0;
- return true;
- }// found some values
- } else {
- discardCurrentRowNode(true);
- }
- }
- return false;// went past the end
- }
-
-
- /**************** secondary traversal methods ******************************/
-
- protected void reInitFirstNode() {
- resetToBeforeFirstEntry();
- initFirstNode();
- }
-
- protected void initFirstNode() {
- int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset();
- rowNodeStackIndex = 0;
- currentRowNode = rowNodes[0];
- currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure);
- appendCurrentTokenToRowBuffer();
- beforeFirst = false;
- }
-
- protected void followFirstFan() {
- followFan(0);
- }
-
- protected void followPreviousFan() {
- int nextFanPosition = currentRowNode.getFanIndex() - 1;
- followFan(nextFanPosition);
- }
-
- protected void followCurrentFan() {
- int currentFanPosition = currentRowNode.getFanIndex();
- followFan(currentFanPosition);
- }
-
- protected void followNextFan() {
- int nextFanPosition = currentRowNode.getFanIndex() + 1;
- followFan(nextFanPosition);
- }
-
- protected void followLastFan() {
- followFan(currentRowNode.getLastFanIndex());
- }
-
- protected void followFan(int fanIndex) {
- currentRowNode.setFanIndex(fanIndex);
- appendToRowBuffer(currentRowNode.getFanByte(fanIndex));
-
- int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset()
- + currentRowNode.getNextNodeOffset(fanIndex, blockMeta);
- ++rowNodeStackIndex;
-
- currentRowNode = rowNodes[rowNodeStackIndex];
- currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure);
-
- //TODO getToken is spewing garbage
- appendCurrentTokenToRowBuffer();
- if (currentRowNode.isNub()) {
- nubCellsRemain = true;
- }
- currentCellIndex = 0;
- }
-
- /**
- * @param forwards which marker to set if we overflow
- */
- protected void discardCurrentRowNode(boolean forwards) {
- RowNodeReader rowNodeBeingPopped = currentRowNode;
- --rowNodeStackIndex;// pop it off the stack
- if (rowNodeStackIndex < 0) {
- currentRowNode = null;
- if (forwards) {
- markAfterLast();
- } else {
- markBeforeFirst();
- }
- return;
- }
- popFromRowBuffer(rowNodeBeingPopped);
- currentRowNode = rowNodes[rowNodeStackIndex];
- }
-
- protected void markBeforeFirst() {
- beforeFirst = true;
- afterLast = false;
- currentRowNode = null;
- }
-
- protected void markAfterLast() {
- beforeFirst = false;
- afterLast = true;
- currentRowNode = null;
- }
-
-
- /***************** helper methods **************************/
-
- protected void appendCurrentTokenToRowBuffer() {
- System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength,
- currentRowNode.getTokenLength());
- rowLength += currentRowNode.getTokenLength();
- }
-
- protected void appendToRowBuffer(byte b) {
- rowBuffer[rowLength] = b;
- ++rowLength;
- }
-
- protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) {
- rowLength -= rowNodeBeingPopped.getTokenLength();
- --rowLength; // pop the parent's fan byte
- }
-
- protected boolean hasOccurrences() {
- return currentRowNode != null && currentRowNode.hasOccurrences();
- }
-
- protected boolean isBranch() {
- return currentRowNode != null && !currentRowNode.hasOccurrences()
- && currentRowNode.hasChildren();
- }
-
- protected boolean isNub() {
- return currentRowNode != null && currentRowNode.hasOccurrences()
- && currentRowNode.hasChildren();
- }
-
- protected boolean isLeaf() {
- return currentRowNode != null && currentRowNode.hasOccurrences()
- && !currentRowNode.hasChildren();
- }
-
- //TODO expose this in a PrefixTreeScanner interface
- public boolean isBeforeFirst(){
- return beforeFirst;
- }
-
- public boolean isAfterLast(){
- return afterLast;
- }
-
- protected boolean isOutOfBounds(){
- return beforeFirst || afterLast;
- }
-
- protected boolean isFirstCellInRow() {
- return currentCellIndex == 0;
- }
-
- protected boolean isLastCellInRow() {
- return currentCellIndex == currentRowNode.getLastCellIndex();
- }
-
-
- /********************* fill in family/qualifier/ts/type/value ************/
-
- protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) {
- populateNonRowFields(cellNum);
- return CellComparator.compareStatic(this, key);
- }
-
- protected void populateFirstNonRowFields() {
- populateNonRowFields(0);
- }
-
- protected void populatePreviousNonRowFields() {
- populateNonRowFields(currentCellIndex - 1);
- }
-
- protected void populateLastNonRowFields() {
- populateNonRowFields(currentRowNode.getLastCellIndex());
- }
-
- protected void populateNonRowFields(int cellIndex) {
- currentCellIndex = cellIndex;
- populateFamily();
- populateQualifier();
- populateTimestamp();
- populateMvccVersion();
- populateType();
- populateValueOffsets();
- }
-
- protected void populateFamily() {
- int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta);
- familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset();
- familyLength = familyReader.getColumnLength();
- }
-
- protected void populateQualifier() {
- int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta);
- qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset();
- qualifierLength = qualifierReader.getColumnLength();
- }
-
- protected void populateTimestamp() {
- if (blockMeta.isAllSameTimestamp()) {
- timestamp = blockMeta.getMinTimestamp();
- } else {
- int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta);
- timestamp = timestampDecoder.getLong(timestampIndex);
- }
- }
-
- protected void populateMvccVersion() {
- if (blockMeta.isAllSameMvccVersion()) {
- mvccVersion = blockMeta.getMinMvccVersion();
- } else {
- int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex,
- blockMeta);
- mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex);
- }
- }
-
- protected void populateType() {
- int typeInt;
- if (blockMeta.isAllSameType()) {
- typeInt = blockMeta.getAllTypes();
- } else {
- typeInt = currentRowNode.getType(currentCellIndex, blockMeta);
- }
- type = PrefixTreeCell.TYPES[typeInt];
- }
-
- protected void populateValueOffsets() {
- int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta);
- absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection;
- valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
- }
-
-
- /**************** getters ***************************/
-
- public byte[] getTreeBytes() {
- return block;
- }
-
- public PrefixTreeBlockMeta getBlockMeta() {
- return blockMeta;
- }
-
- public int getMaxRowTreeStackNodes() {
- return rowNodes.length;
- }
-
- public int getRowBufferLength() {
- return rowBuffer.length;
- }
-
- public int getQualifierBufferLength() {
- return qualifierBuffer.length;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java
deleted file mode 100644
index 5201b6d..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition;
-import org.apache.hbase.codec.prefixtree.scanner.CellSearcher;
-
-import com.google.common.primitives.UnsignedBytes;
-
-/**
- * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to
- * position itself on a requested Cell without scanning through cells before it. The PrefixTree is
- * set up to be a Trie of rows, so finding a particular row is extremely cheap.
- *
- * Once it finds the row, it does a binary search through the cells inside the row, which is not as
- * fast as the trie search, but faster than iterating through every cell like existing block
- * formats
- * do. For this reason, this implementation is targeted towards schemas where rows are narrow
- * enough
- * to have several or many per block, and where you are generally looking for the entire row or
- * the
- * first cell. It will still be fast for wide rows or point queries, but could be improved upon.
- */
-@InterfaceAudience.Private
-public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements
- CellSearcher {
-
- /*************** construct ******************************/
-
- public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
- int rowBufferLength, int qualifierBufferLength) {
- super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
- }
-
-
- /********************* CellSearcher methods *******************/
-
- @Override
- public boolean positionAt(Cell key) {
- return CellScannerPosition.AT == positionAtOrAfter(key);
- }
-
- @Override
- public CellScannerPosition positionAtOrBefore(Cell key) {
- reInitFirstNode();
- int fanIndex = -1;
-
- while(true){
- //detect row mismatch. break loop if mismatch
- int currentNodeDepth = rowLength;
- int rowTokenComparison = compareToCurrentToken(key);
- if(rowTokenComparison != 0){
- return fixRowTokenMissReverse(rowTokenComparison);
- }
-
- //exact row found, move on to qualifier & ts
- if(rowMatchesAfterCurrentPosition(key)){
- return positionAtQualifierTimestamp(key, true);
- }
-
- //detect dead end (no fan to descend into)
- if(!currentRowNode.hasFan()){
- if(hasOccurrences()){//must be leaf or nub
- populateLastNonRowFields();
- return CellScannerPosition.BEFORE;
- }else{
- //TODO i don't think this case is exercised by any tests
- return fixRowFanMissReverse(0);
- }
- }
-
- //keep hunting for the rest of the row
- byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
- fanIndex = currentRowNode.whichFanNode(searchForByte);
- if(fanIndex < 0){//no matching row. return early
- int insertionPoint = -fanIndex;
- return fixRowFanMissReverse(insertionPoint);
- }
- //found a match, so dig deeper into the tree
- followFan(fanIndex);
- }
- }
-
- /**
- * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra
- * if-statements. Priority on readability and debugability.
- */
- @Override
- public CellScannerPosition positionAtOrAfter(Cell key) {
- reInitFirstNode();
- int fanIndex = -1;
-
- while(true){
- //detect row mismatch. break loop if mismatch
- int currentNodeDepth = rowLength;
- int rowTokenComparison = compareToCurrentToken(key);
- if(rowTokenComparison != 0){
- return fixRowTokenMissForward(rowTokenComparison);
- }
-
- //exact row found, move on to qualifier & ts
- if(rowMatchesAfterCurrentPosition(key)){
- return positionAtQualifierTimestamp(key, false);
- }
-
- //detect dead end (no fan to descend into)
- if(!currentRowNode.hasFan()){
- if(hasOccurrences()){
- populateFirstNonRowFields();
- return CellScannerPosition.AFTER;
- }else{
- //TODO i don't think this case is exercised by any tests
- return fixRowFanMissForward(0);
- }
- }
-
- //keep hunting for the rest of the row
- byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth);
- fanIndex = currentRowNode.whichFanNode(searchForByte);
- if(fanIndex < 0){//no matching row. return early
- int insertionPoint = -fanIndex;
- return fixRowFanMissForward(insertionPoint);
- }
- //found a match, so dig deeper into the tree
- followFan(fanIndex);
- }
- }
-
- @Override
- public boolean seekForwardTo(Cell key) {
- if(currentPositionIsAfter(key)){
- //our position is after the requested key, so can't do anything
- return false;
- }
- return positionAt(key);
- }
-
- @Override
- public CellScannerPosition seekForwardToOrBefore(Cell key) {
- //Do we even need this check or should upper layers avoid this situation. It's relatively
- //expensive compared to the rest of the seek operation.
- if(currentPositionIsAfter(key)){
- //our position is after the requested key, so can't do anything
- return CellScannerPosition.AFTER;
- }
-
- return positionAtOrBefore(key);
- }
-
- @Override
- public CellScannerPosition seekForwardToOrAfter(Cell key) {
- //Do we even need this check or should upper layers avoid this situation. It's relatively
- //expensive compared to the rest of the seek operation.
- if(currentPositionIsAfter(key)){
- //our position is after the requested key, so can't do anything
- return CellScannerPosition.AFTER;
- }
-
- return positionAtOrAfter(key);
- }
-
- /**
- * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false
- */
- @Override
- public void positionAfterLastCell() {
- resetToBeforeFirstEntry();
- beforeFirst = false;
- afterLast = true;
- }
-
-
- /***************** Object methods ***************************/
-
- @Override
- public boolean equals(Object obj) {
- //trivial override to confirm intent (findbugs)
- return super.equals(obj);
- }
-
-
- /****************** internal methods ************************/
-
- protected boolean currentPositionIsAfter(Cell cell){
- return compareTo(cell) > 0;
- }
-
- protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) {
- int minIndex = 0;
- int maxIndex = currentRowNode.getLastCellIndex();
- int diff;
- while (true) {
- int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow
- diff = populateNonRowFieldsAndCompareTo(midIndex, key);
-
- if (diff == 0) {// found exact match
- return CellScannerPosition.AT;
- } else if (minIndex == maxIndex) {// even termination case
- break;
- } else if ((minIndex + 1) == maxIndex) {// odd termination case
- diff = populateNonRowFieldsAndCompareTo(maxIndex, key);
- if(diff > 0){
- diff = populateNonRowFieldsAndCompareTo(minIndex, key);
- }
- break;
- } else if (diff < 0) {// keep going forward
- minIndex = currentCellIndex;
- } else {// went past it, back up
- maxIndex = currentCellIndex;
- }
- }
-
- if (diff == 0) {
- return CellScannerPosition.AT;
-
- } else if (diff < 0) {// we are before key
- if (beforeOnMiss) {
- return CellScannerPosition.BEFORE;
- }
- if (advance()) {
- return CellScannerPosition.AFTER;
- }
- return CellScannerPosition.AFTER_LAST;
-
- } else {// we are after key
- if (!beforeOnMiss) {
- return CellScannerPosition.AFTER;
- }
- if (previous()) {
- return CellScannerPosition.BEFORE;
- }
- return CellScannerPosition.BEFORE_FIRST;
- }
- }
-
- /**
- * compare this.row to key.row but starting at the current rowLength
- * @param key Cell being searched for
- * @return true if row buffer contents match key.row
- */
- protected boolean rowMatchesAfterCurrentPosition(Cell key) {
- if (!currentRowNode.hasOccurrences()) {
- return false;
- }
- int thatRowLength = key.getRowLength();
- if (rowLength != thatRowLength) {
- return false;
- }
- return true;
- }
-
- // TODO move part of this to Cell comparator?
- /**
- * Compare only the bytes within the window of the current token
- * @param key
- * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after
- */
- protected int compareToCurrentToken(Cell key) {
- int startIndex = rowLength - currentRowNode.getTokenLength();
- int endIndexExclusive = startIndex + currentRowNode.getTokenLength();
- for (int i = startIndex; i < endIndexExclusive; ++i) {
- if (i >= key.getRowLength()) {// key was shorter, so it's first
- return -1;
- }
- byte keyByte = CellUtil.getRowByte(key, i);
- byte thisByte = rowBuffer[i];
- if (keyByte == thisByte) {
- continue;
- }
- return UnsignedBytes.compare(keyByte, thisByte);
- }
- return 0;
- }
-
- protected void followLastFansUntilExhausted(){
- while(currentRowNode.hasFan()){
- followLastFan();
- }
- }
-
-
- /****************** complete seek when token mismatch ******************/
-
- /**
- * @param searcherIsAfterInputKey <0: input key is before the searcher's position
- * >0: input key is after the searcher's position
- */
- protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) {
- if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up
- boolean foundPreviousRow = previousRow(true);
- if(foundPreviousRow){
- populateLastNonRowFields();
- return CellScannerPosition.BEFORE;
- }else{
- return CellScannerPosition.BEFORE_FIRST;
- }
-
- }else{//searcher position is before the input key
- if(currentRowNode.hasOccurrences()){
- populateFirstNonRowFields();
- return CellScannerPosition.BEFORE;
- }
- boolean foundNextRow = nextRow();
- if(foundNextRow){
- return CellScannerPosition.AFTER;
- }else{
- return CellScannerPosition.AFTER_LAST;
- }
- }
- }
-
- /**
- * @param searcherIsAfterInputKey <0: input key is before the searcher's position
- * >0: input key is after the searcher's position
- */
- protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) {
- if (searcherIsAfterInputKey < 0) {//searcher position is after the input key
- if(currentRowNode.hasOccurrences()){
- populateFirstNonRowFields();
- return CellScannerPosition.AFTER;
- }
- boolean foundNextRow = nextRow();
- if(foundNextRow){
- return CellScannerPosition.AFTER;
- }else{
- return CellScannerPosition.AFTER_LAST;
- }
-
- }else{//searcher position is before the input key, so go forward
- discardCurrentRowNode(true);
- boolean foundNextRow = nextRow();
- if(foundNextRow){
- return CellScannerPosition.AFTER;
- }else{
- return CellScannerPosition.AFTER_LAST;
- }
- }
- }
-
-
- /****************** complete seek when fan mismatch ******************/
-
- protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){
- if(fanInsertionPoint == 0){//we need to back up a row
- boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row
- if(foundPreviousRow){
- populateLastNonRowFields();
- return CellScannerPosition.BEFORE;
- }
- return CellScannerPosition.BEFORE_FIRST;
- }
-
- //follow the previous fan, but then descend recursively forward
- followFan(fanInsertionPoint - 1);
- followLastFansUntilExhausted();
- populateLastNonRowFields();
- return CellScannerPosition.BEFORE;
- }
-
- protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){
- if(fanInsertionPoint >= currentRowNode.getFanOut()){
- discardCurrentRowNode(true);
- if (!nextRow()) {
- return CellScannerPosition.AFTER_LAST;
- } else {
- return CellScannerPosition.AFTER;
- }
- }
-
- followFan(fanInsertionPoint);
- if(hasOccurrences()){
- populateFirstNonRowFields();
- return CellScannerPosition.AFTER;
- }
-
- if(nextRowInternal()){
- populateFirstNonRowFields();
- return CellScannerPosition.AFTER;
-
- }else{
- return CellScannerPosition.AFTER_LAST;
- }
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java
deleted file mode 100644
index 5573c02..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.KeyValueUtil;
-
-/**
- * As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields
- * of this class so that Cell logic can be applied, but without allocating new memory for every Cell
- * iterated through.
- */
-@InterfaceAudience.Private
-public class PrefixTreeCell implements Cell, Comparable {
-
- /********************** static **********************/
-
- public static final KeyValue.Type[] TYPES = new KeyValue.Type[256];
- static {
- for (KeyValue.Type type : KeyValue.Type.values()) {
- TYPES[type.getCode() & 0xff] = type;
- }
- }
-
- //Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized.
- public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put;
-
- /******************** fields ************************/
-
- protected byte[] block;
- //we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler
- protected boolean includeMvccVersion;
-
- protected byte[] rowBuffer;
- protected int rowLength;
-
- protected byte[] familyBuffer;
- protected int familyOffset;
- protected int familyLength;
-
- protected byte[] qualifierBuffer;// aligned to the end of the array
- protected int qualifierOffset;
- protected int qualifierLength;
-
- protected Long timestamp;
- protected Long mvccVersion;
-
- protected KeyValue.Type type;
-
- protected int absoluteValueOffset;
- protected int valueLength;
-
-
- /********************** Cell methods ******************/
-
- /**
- * For debugging. Currently creates new KeyValue to utilize its toString() method.
- */
- @Override
- public String toString() {
- return getKeyValueString();
- }
-
- @Override
- public boolean equals(Object obj) {
- if (!(obj instanceof Cell)) {
- return false;
- }
- //Temporary hack to maintain backwards compatibility with KeyValue.equals
- return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj);
-
- //TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907
- }
-
- @Override
- public int hashCode(){
- //Temporary hack to maintain backwards compatibility with KeyValue.hashCode
- //I don't think this is used in any hot code paths
- return KeyValueUtil.copyToNewKeyValue(this).hashCode();
-
- //TODO return CellComparator.hashCode(this);//see HBASE-6907
- }
-
- @Override
- public int compareTo(Cell other) {
- return CellComparator.compareStatic(this, other);
- }
-
- @Override
- public long getTimestamp() {
- return timestamp;
- }
-
- @Override
- public long getMvccVersion() {
- if (!includeMvccVersion) {
- return 0L;
- }
- return mvccVersion;
- }
-
- @Override
- public int getValueLength() {
- return valueLength;
- }
-
- @Override
- public byte[] getRowArray() {
- return rowBuffer;
- }
-
- @Override
- public int getRowOffset() {
- return 0;
- }
-
- @Override
- public short getRowLength() {
- return (short) rowLength;
- }
-
- @Override
- public byte[] getFamilyArray() {
- return familyBuffer;
- }
-
- @Override
- public int getFamilyOffset() {
- return familyOffset;
- }
-
- @Override
- public byte getFamilyLength() {
- return (byte) familyLength;
- }
-
- @Override
- public byte[] getQualifierArray() {
- return qualifierBuffer;
- }
-
- @Override
- public int getQualifierOffset() {
- return qualifierOffset;
- }
-
- @Override
- public int getQualifierLength() {
- return qualifierLength;
- }
-
- @Override
- public byte[] getValueArray() {
- return block;
- }
-
- @Override
- public int getValueOffset() {
- return absoluteValueOffset;
- }
-
- @Override
- public byte getTypeByte() {
- return type.getCode();
- }
-
-
- /************************* helper methods *************************/
-
- /**
- * Need this separate method so we can call it from subclasses' toString() methods
- */
- protected String getKeyValueString(){
- KeyValue kv = KeyValueUtil.copyToNewKeyValue(this);
- return kv.toString();
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java
deleted file mode 100644
index 1623876..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode.column;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.util.vint.UFIntTool;
-import org.apache.hbase.util.vint.UVIntTool;
-
-@InterfaceAudience.Private
-public class ColumnNodeReader {
-
- /**************** fields ************************/
-
- protected PrefixTreeBlockMeta blockMeta;
- protected byte[] block;
-
- protected byte[] columnBuffer;
- protected boolean familyVsQualifier;
-
- protected int offsetIntoBlock;
-
- protected int tokenOffsetIntoBlock;
- protected int tokenLength;
- protected int parentStartPosition;
-
-
- /************** construct *************************/
-
- public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) {
- this.columnBuffer = columnBuffer;
- this.familyVsQualifier = familyVsQualifier;
- }
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
- this.blockMeta = blockMeta;
- this.block = block;
- }
-
-
- /************* methods *****************************/
-
- public void positionAt(int offsetIntoBlock) {
- this.offsetIntoBlock = offsetIntoBlock;
- tokenLength = UVIntTool.getInt(block, offsetIntoBlock);
- tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
- int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
- int offsetWidth;
- if (familyVsQualifier) {
- offsetWidth = blockMeta.getFamilyOffsetWidth();
- } else {
- offsetWidth = blockMeta.getQualifierOffsetWidth();
- }
- parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
- }
-
- public void prependTokenToBuffer(int bufferStartIndex) {
- System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength);
- }
-
- public boolean isRoot() {
- if (familyVsQualifier) {
- return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
- } else {
- return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
- }
- }
-
-
- /************** standard methods *********************/
-
- @Override
- public String toString() {
- return super.toString() + "[" + offsetIntoBlock + "]";
- }
-
-
- /****************** get/set ****************************/
-
- public int getTokenLength() {
- return tokenLength;
- }
-
- public int getParentStartPosition() {
- return parentStartPosition;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java
deleted file mode 100644
index 593031e..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode.column;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-
-/**
- * Position one of these appropriately in the data block and you can call its methods to retrieve
- * the family or qualifier at the current position.
- */
-@InterfaceAudience.Private
-public class ColumnReader {
-
- /****************** fields *************************/
-
- protected PrefixTreeBlockMeta blockMeta;
-
- protected byte[] columnBuffer;
- protected int columnOffset;
- protected int columnLength;
- protected boolean familyVsQualifier;
-
- protected ColumnNodeReader columnNodeReader;
-
-
- /******************** construct *******************/
-
- public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) {
- this.columnBuffer = columnBuffer;
- this.familyVsQualifier = familyVsQualifier;
- this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier);
- }
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
- this.blockMeta = blockMeta;
- clearColumnBuffer();
- columnNodeReader.initOnBlock(blockMeta, block);
- }
-
-
- /********************* methods *******************/
-
- public ColumnReader populateBuffer(int offsetIntoColumnData) {
- clearColumnBuffer();
- int nextRelativeOffset = offsetIntoColumnData;
- while (true) {
- int absoluteOffset;
- if (familyVsQualifier) {
- absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
- } else {
- absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
- }
- columnNodeReader.positionAt(absoluteOffset);
- columnOffset -= columnNodeReader.getTokenLength();
- columnLength += columnNodeReader.getTokenLength();
- columnNodeReader.prependTokenToBuffer(columnOffset);
- if (columnNodeReader.isRoot()) {
- return this;
- }
- nextRelativeOffset = columnNodeReader.getParentStartPosition();
- }
- }
-
- public byte[] copyBufferToNewArray() {// for testing
- byte[] out = new byte[columnLength];
- System.arraycopy(columnBuffer, columnOffset, out, 0, out.length);
- return out;
- }
-
- public int getColumnLength() {
- return columnLength;
- }
-
- public void clearColumnBuffer() {
- columnOffset = columnBuffer.length;
- columnLength = 0;
- }
-
-
- /****************************** get/set *************************************/
-
- public int getColumnOffset() {
- return columnOffset;
- }
-
-}
-
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java
deleted file mode 100644
index 1adc838..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode.row;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.util.vint.UFIntTool;
-import org.apache.hbase.util.vint.UVIntTool;
-
-/**
- * Position one of these appropriately in the data block and you can call its methods to retrieve
- * information necessary to decode the cells in the row.
- */
-@InterfaceAudience.Private
-public class RowNodeReader {
-
- /************* fields ***********************************/
-
- protected byte[] block;
- protected int offset;
- protected int fanIndex;
-
- protected int numCells;
-
- protected int tokenOffset;
- protected int tokenLength;
- protected int fanOffset;
- protected int fanOut;
-
- protected int familyOffsetsOffset;
- protected int qualifierOffsetsOffset;
- protected int timestampIndexesOffset;
- protected int mvccVersionIndexesOffset;
- protected int operationTypesOffset;
- protected int valueOffsetsOffset;
- protected int valueLengthsOffset;
- protected int nextNodeOffsetsOffset;
-
-
- /******************* construct **************************/
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) {
- this.block = block;
-
- this.offset = offset;
- resetFanIndex();
-
- this.tokenLength = UVIntTool.getInt(block, offset);
- this.tokenOffset = offset + UVIntTool.numBytes(tokenLength);
-
- this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength);
- this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut);
-
- this.numCells = UVIntTool.getInt(block, fanOffset + fanOut);
-
- this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
- this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
- this.timestampIndexesOffset = qualifierOffsetsOffset + numCells
- * blockMeta.getQualifierOffsetWidth();
- this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
- * blockMeta.getTimestampIndexWidth();
- this.operationTypesOffset = mvccVersionIndexesOffset + numCells
- * blockMeta.getMvccVersionIndexWidth();
- this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth();
- this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth();
- this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth();
- }
-
-
- /******************** methods ****************************/
-
- public boolean isLeaf() {
- return fanOut == 0;
- }
-
- public boolean isNub() {
- return fanOut > 0 && numCells > 0;
- }
-
- public boolean isBranch() {
- return fanOut > 0 && numCells == 0;
- }
-
- public boolean hasOccurrences() {
- return numCells > 0;
- }
-
- public int getTokenArrayOffset(){
- return tokenOffset;
- }
-
- public int getTokenLength() {
- return tokenLength;
- }
-
- public byte getFanByte(int i) {
- return block[fanOffset + i];
- }
-
- /**
- * for debugging
- */
- protected String getFanByteReadable(int i){
- return Bytes.toStringBinary(block, fanOffset + i, 1);
- }
-
- public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getFamilyOffsetWidth();
- int startIndex = familyOffsetsOffset + fIntWidth * index;
- return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- }
-
- public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getQualifierOffsetWidth();
- int startIndex = qualifierOffsetsOffset + fIntWidth * index;
- return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- }
-
- public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getTimestampIndexWidth();
- int startIndex = timestampIndexesOffset + fIntWidth * index;
- return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- }
-
- public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getMvccVersionIndexWidth();
- int startIndex = mvccVersionIndexesOffset + fIntWidth * index;
- return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- }
-
- public int getType(int index, PrefixTreeBlockMeta blockMeta) {
- if (blockMeta.isAllSameType()) {
- return blockMeta.getAllTypes();
- }
- return block[operationTypesOffset + index];
- }
-
- public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getValueOffsetWidth();
- int startIndex = valueOffsetsOffset + fIntWidth * index;
- int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- return offset;
- }
-
- public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getValueLengthWidth();
- int startIndex = valueLengthsOffset + fIntWidth * index;
- int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- return length;
- }
-
- public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) {
- int fIntWidth = blockMeta.getNextNodeOffsetWidth();
- int startIndex = nextNodeOffsetsOffset + fIntWidth * index;
- return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
- }
-
- public String getBranchNubLeafIndicator() {
- if (isNub()) {
- return "N";
- }
- return isBranch() ? "B" : "L";
- }
-
- public boolean hasChildren() {
- return fanOut > 0;
- }
-
- public int getLastFanIndex() {
- return fanOut - 1;
- }
-
- public int getLastCellIndex() {
- return numCells - 1;
- }
-
- public int getNumCells() {
- return numCells;
- }
-
- public int getFanOut() {
- return fanOut;
- }
-
- public byte[] getToken() {
- // TODO pass in reusable ByteRange
- return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray();
- }
-
- public int getOffset() {
- return offset;
- }
-
- public int whichFanNode(byte searchForByte) {
- if( ! hasFan()){
- throw new IllegalStateException("This row node has no fan, so can't search it");
- }
- int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut,
- searchForByte);
- if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block
- return fanIndexInBlock - fanOffset;
- }
- return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse
- }
-
- public void resetFanIndex() {
- fanIndex = -1;// just the way the logic currently works
- }
-
- public int getFanIndex() {
- return fanIndex;
- }
-
- public void setFanIndex(int fanIndex) {
- this.fanIndex = fanIndex;
- }
-
- public boolean hasFan(){
- return fanOut > 0;
- }
-
- public boolean hasPreviousFanNodes() {
- return fanOut > 0 && fanIndex > 0;
- }
-
- public boolean hasMoreFanNodes() {
- return fanIndex < getLastFanIndex();
- }
-
- public boolean isOnLastFanNode() {
- return !hasMoreFanNodes();
- }
-
-
- /*************** standard methods **************************/
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut));
- sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength));
- sb.append(",numCells:" + numCells);
- sb.append(",fanIndex:"+fanIndex);
- if(fanIndex>=0){
- sb.append("("+getFanByteReadable(fanIndex)+")");
- }
- return sb.toString();
- }
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java
deleted file mode 100644
index 5a88fdf..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode.timestamp;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.util.vint.UFIntTool;
-
-/**
- * Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block.
- */
-@InterfaceAudience.Private
-public class MvccVersionDecoder {
-
- protected PrefixTreeBlockMeta blockMeta;
- protected byte[] block;
-
-
- /************** construct ***********************/
-
- public MvccVersionDecoder() {
- }
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
- this.block = block;
- this.blockMeta = blockMeta;
- }
-
-
- /************** methods *************************/
-
- public long getMvccVersion(int index) {
- if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical
- return blockMeta.getMinMvccVersion();
- }
- int startIndex = blockMeta.getAbsoluteMvccVersionOffset()
- + blockMeta.getMvccVersionDeltaWidth() * index;
- long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth());
- return blockMeta.getMinMvccVersion() + delta;
- }
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java
deleted file mode 100644
index b3e122a..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.decode.timestamp;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.util.vint.UFIntTool;
-
-/**
- * Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block.
- */
-@InterfaceAudience.Private
-public class TimestampDecoder {
-
- protected PrefixTreeBlockMeta blockMeta;
- protected byte[] block;
-
-
- /************** construct ***********************/
-
- public TimestampDecoder() {
- }
-
- public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
- this.block = block;
- this.blockMeta = blockMeta;
- }
-
-
- /************** methods *************************/
-
- public long getLong(int index) {
- if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical
- return blockMeta.getMinTimestamp();
- }
- int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth()
- * index;
- long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth());
- return blockMeta.getMinTimestamp() + delta;
- }
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java
deleted file mode 100644
index b26607f..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode;
-
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the
- * ones retrieved from the pool for usage.
- */
-@InterfaceAudience.Private
-public class EncoderFactory {
-
- private static final EncoderPool POOL = new ThreadLocalEncoderPool();
-
-
- public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) {
- return POOL.checkOut(outputStream, includeMvccVersion);
- }
-
- public static void checkIn(PrefixTreeEncoder encoder) {
- POOL.checkIn(encoder);
- }
-
-
- /**************************** helper ******************************/
-
- protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder,
- OutputStream outputStream, boolean includeMvccVersion) {
- PrefixTreeEncoder ret = encoder;
- if (encoder == null) {
- ret = new PrefixTreeEncoder(outputStream, includeMvccVersion);
- }
- ret.reset(outputStream, includeMvccVersion);
- return ret;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java
deleted file mode 100644
index ca73f91..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode;
-
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-
-@InterfaceAudience.Private
-public interface EncoderPool {
-
- PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion);
- void checkIn(PrefixTreeEncoder encoder);
-
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
deleted file mode 100644
index 46cb707..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.KeyValueUtil;
-import org.apache.hadoop.hbase.io.CellOutputStream;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.io.WritableUtils;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
-import org.apache.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
-import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder;
-import org.apache.hbase.codec.prefixtree.encode.row.RowSectionWriter;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
-import org.apache.hbase.util.byterange.ByteRangeSet;
-import org.apache.hbase.util.byterange.impl.ByteRangeHashSet;
-import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet;
-import org.apache.hbase.util.vint.UFIntTool;
-
-/**
- * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
- * added they are completely copied into the various encoding structures. This is important because
- * usually the cells being fed in during compactions will be transient.
- *
- * Usage:
- * 1) constructor
- * 4) append cells in sorted order: write(Cell cell)
- * 5) flush()
- */
-@InterfaceAudience.Private
-public class PrefixTreeEncoder implements CellOutputStream {
-
- /**************** static ************************/
-
- protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class);
-
- //future-proof where HBase supports multiple families in a data block.
- public static final boolean MULITPLE_FAMILIES_POSSIBLE = false;
-
- private static final boolean USE_HASH_COLUMN_SORTER = true;
- private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256;
- private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024;
-
-
- /**************** fields *************************/
-
- protected long numResets = 0L;
-
- protected OutputStream outputStream;
-
- /*
- * Cannot change during a single block's encoding. If false, then substitute incoming Cell's
- * mvccVersion with zero and write out the block as usual.
- */
- protected boolean includeMvccVersion;
-
- /*
- * reusable ByteRanges used for communicating with the sorters/compilers
- */
- protected ByteRange rowRange;
- protected ByteRange familyRange;
- protected ByteRange qualifierRange;
-
- /*
- * incoming Cell fields are copied into these arrays
- */
- protected long[] timestamps;
- protected long[] mvccVersions;
- protected byte[] typeBytes;
- protected int[] valueOffsets;
- protected byte[] values;
-
- protected PrefixTreeBlockMeta blockMeta;
-
- /*
- * Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and
- * compile before flushing.
- */
- protected LongEncoder timestampEncoder;
- protected LongEncoder mvccVersionEncoder;
- protected CellTypeEncoder cellTypeEncoder;
-
- /*
- * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them
- * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by
- * comparing only with the previous row key, families and qualifiers can arrive in unsorted order
- * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them.
- */
- protected ByteRangeSet familyDeduplicator;
- protected ByteRangeSet qualifierDeduplicator;
-
- /*
- * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
- * trie structure with nodes connected by memory pointers (not serializable yet).
- */
- protected Tokenizer rowTokenizer;
- protected Tokenizer familyTokenizer;
- protected Tokenizer qualifierTokenizer;
-
- /*
- * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
- * all information to an output stream of bytes that can be stored on disk.
- */
- protected RowSectionWriter rowWriter;
- protected ColumnSectionWriter familyWriter;
- protected ColumnSectionWriter qualifierWriter;
-
- /*
- * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
- * were full KeyValues because some parts of HBase like to know the "unencoded size".
- */
- protected int totalCells = 0;
- protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
- protected int totalValueBytes = 0;
- protected int maxValueLength = 0;
- protected int totalBytes = 0;//
-
-
- /***************** construct ***********************/
-
- public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) {
- // used during cell accumulation
- this.blockMeta = new PrefixTreeBlockMeta();
- this.rowRange = new ByteRange();
- this.familyRange = new ByteRange();
- this.qualifierRange = new ByteRange();
- this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES];
- this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES];
- this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES];
- this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES];
- this.values = new byte[VALUE_BUFFER_INIT_SIZE];
-
- // used during compilation
- this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
- : new ByteRangeTreeSet();
- this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
- : new ByteRangeTreeSet();
- this.timestampEncoder = new LongEncoder();
- this.mvccVersionEncoder = new LongEncoder();
- this.cellTypeEncoder = new CellTypeEncoder();
- this.rowTokenizer = new Tokenizer();
- this.familyTokenizer = new Tokenizer();
- this.qualifierTokenizer = new Tokenizer();
- this.rowWriter = new RowSectionWriter();
- this.familyWriter = new ColumnSectionWriter();
- this.qualifierWriter = new ColumnSectionWriter();
-
- reset(outputStream, includeMvccVersion);
- }
-
- public void reset(OutputStream outputStream, boolean includeMvccVersion) {
- ++numResets;
- this.includeMvccVersion = includeMvccVersion;
- this.outputStream = outputStream;
- valueOffsets[0] = 0;
-
- familyDeduplicator.reset();
- qualifierDeduplicator.reset();
- rowTokenizer.reset();
- timestampEncoder.reset();
- mvccVersionEncoder.reset();
- cellTypeEncoder.reset();
- familyTokenizer.reset();
- qualifierTokenizer.reset();
- rowWriter.reset();
- familyWriter.reset();
- qualifierWriter.reset();
-
- totalCells = 0;
- totalUnencodedBytes = 0;
- totalValueBytes = 0;
- maxValueLength = 0;
- totalBytes = 0;
- }
-
- /**
- * Check that the arrays used to hold cell fragments are large enough for the cell that is being
- * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
- * first few block encodings but should stabilize quickly.
- */
- protected void ensurePerCellCapacities() {
- int currentCapacity = valueOffsets.length;
- int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe
- if (neededCapacity < currentCapacity) {
- return;
- }
-
- int padding = neededCapacity;//this will double the array size
- timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding);
- mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding);
- typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding);
- valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding);
- }
-
- /******************** CellOutputStream methods *************************/
-
- /**
- * Note: Unused until support is added to the scanner/heap
- *
- * The following method are optimized versions of write(Cell cell). The result should be
- * identical, however the implementation may be able to execute them much more efficiently because
- * it does not need to compare the unchanged fields with the previous cell's.
- *
- * Consider the benefits during compaction when paired with a CellScanner that is also aware of
- * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells
- * to the write(Cell cell) method.
- *
- * The savings of skipping duplicate row detection are significant with long row keys. A
- * DataBlockEncoder may store a row key once in combination with a count of how many cells are in
- * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment
- * of the counter, and that is for every cell in the row.
- */
-
- /**
- * Add a Cell to the output stream but repeat the previous row.
- */
- //@Override
- public void writeWithRepeatRow(Cell cell) {
- ensurePerCellCapacities();//can we optimize away some of this?
-
- //save a relatively expensive row comparison, incrementing the row's counter instead
- rowTokenizer.incrementNumOccurrencesOfLatestValue();
- addFamilyPart(cell);
- addQualifierPart(cell);
- addAfterRowFamilyQualifier(cell);
- }
-
-
- @Override
- public void write(Cell cell) {
- ensurePerCellCapacities();
-
- rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange));
- addFamilyPart(cell);
- addQualifierPart(cell);
- addAfterRowFamilyQualifier(cell);
- }
-
-
- /***************** internal add methods ************************/
-
- private void addAfterRowFamilyQualifier(Cell cell){
- // timestamps
- timestamps[totalCells] = cell.getTimestamp();
- timestampEncoder.add(cell.getTimestamp());
-
- // memstore timestamps
- if (includeMvccVersion) {
- mvccVersions[totalCells] = cell.getMvccVersion();
- mvccVersionEncoder.add(cell.getMvccVersion());
- totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion());
- }else{
- //must overwrite in case there was a previous version in this array slot
- mvccVersions[totalCells] = 0L;
- if(totalCells == 0){//only need to do this for the first cell added
- mvccVersionEncoder.add(0L);
- }
- //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled
- }
-
- // types
- typeBytes[totalCells] = cell.getTypeByte();
- cellTypeEncoder.add(cell.getTypeByte());
-
- // values
- totalValueBytes += cell.getValueLength();
- // double the array each time we run out of space
- values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes);
- CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]);
- if (cell.getValueLength() > maxValueLength) {
- maxValueLength = cell.getValueLength();
- }
- valueOffsets[totalCells + 1] = totalValueBytes;
-
- // general
- totalUnencodedBytes += KeyValueUtil.length(cell);
- ++totalCells;
- }
-
- private void addFamilyPart(Cell cell) {
- if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) {
- CellUtil.fillFamilyRange(cell, familyRange);
- familyDeduplicator.add(familyRange);
- }
- }
-
- private void addQualifierPart(Cell cell) {
- CellUtil.fillQualifierRange(cell, qualifierRange);
- qualifierDeduplicator.add(qualifierRange);
- }
-
-
- /****************** compiling/flushing ********************/
-
- /**
- * Expensive method. The second half of the encoding work happens here.
- *
- * Take all the separate accumulated data structures and turn them into a single stream of bytes
- * which is written to the outputStream.
- */
- @Override
- public void flush() throws IOException {
- compile();
-
- // do the actual flushing to the output stream. Order matters.
- blockMeta.writeVariableBytesToOutputStream(outputStream);
- rowWriter.writeBytes(outputStream);
- familyWriter.writeBytes(outputStream);
- qualifierWriter.writeBytes(outputStream);
- timestampEncoder.writeBytes(outputStream);
- mvccVersionEncoder.writeBytes(outputStream);
- //CellType bytes are in the row nodes. there is no additional type section
- outputStream.write(values, 0, totalValueBytes);
- }
-
- /**
- * Now that all the cells have been added, do the work to reduce them to a series of byte[]
- * fragments that are ready to be written to the output stream.
- */
- protected void compile(){
- blockMeta.setNumKeyValueBytes(totalUnencodedBytes);
- int lastValueOffset = valueOffsets[totalCells];
- blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
- blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
- blockMeta.setNumValueBytes(totalValueBytes);
- totalBytes += totalValueBytes;
-
- //these compile methods will add to totalBytes
- compileTypes();
- compileMvccVersions();
- compileTimestamps();
- compileQualifiers();
- compileFamilies();
- compileRows();
-
- int numMetaBytes = blockMeta.calculateNumMetaBytes();
- blockMeta.setNumMetaBytes(numMetaBytes);
- totalBytes += numMetaBytes;
- }
-
- /**
- * The following "compile" methods do any intermediate work necessary to transform the cell
- * fragments collected during the writing phase into structures that are ready to write to the
- * outputStream.
- *
- * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion.
- */
-
- protected void compileTypes() {
- blockMeta.setAllSameType(cellTypeEncoder.areAllSameType());
- if(cellTypeEncoder.areAllSameType()){
- blockMeta.setAllTypes(cellTypeEncoder.getOnlyType());
- }
- }
-
- protected void compileMvccVersions() {
- mvccVersionEncoder.compile();
- blockMeta.setMvccVersionFields(mvccVersionEncoder);
- int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength();
- totalBytes += numMvccVersionBytes;
- }
-
- protected void compileTimestamps() {
- timestampEncoder.compile();
- blockMeta.setTimestampFields(timestampEncoder);
- int numTimestampBytes = timestampEncoder.getOutputArrayLength();
- totalBytes += numTimestampBytes;
- }
-
- protected void compileQualifiers() {
- blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
- qualifierDeduplicator.compile();
- qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
- qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false);
- qualifierWriter.compile();
- int numQualifierBytes = qualifierWriter.getNumBytes();
- blockMeta.setNumQualifierBytes(numQualifierBytes);
- totalBytes += numQualifierBytes;
- }
-
- protected void compileFamilies() {
- blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
- familyDeduplicator.compile();
- familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
- familyWriter.reconstruct(blockMeta, familyTokenizer, true);
- familyWriter.compile();
- int numFamilyBytes = familyWriter.getNumBytes();
- blockMeta.setNumFamilyBytes(numFamilyBytes);
- totalBytes += numFamilyBytes;
- }
-
- protected void compileRows() {
- rowWriter.reconstruct(this);
- rowWriter.compile();
- int numRowBytes = rowWriter.getNumBytes();
- blockMeta.setNumRowBytes(numRowBytes);
- blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth());
- totalBytes += numRowBytes;
- }
-
- /********************* convenience getters ********************************/
-
- public long getValueOffset(int index) {
- return valueOffsets[index];
- }
-
- public int getValueLength(int index) {
- return (int) (valueOffsets[index + 1] - valueOffsets[index]);
- }
-
- /************************* get/set *************************************/
-
- public PrefixTreeBlockMeta getBlockMeta() {
- return blockMeta;
- }
-
- public Tokenizer getRowTokenizer() {
- return rowTokenizer;
- }
-
- public LongEncoder getTimestampEncoder() {
- return timestampEncoder;
- }
-
- public int getTotalBytes() {
- return totalBytes;
- }
-
- public long[] getTimestamps() {
- return timestamps;
- }
-
- public long[] getMvccVersions() {
- return mvccVersions;
- }
-
- public byte[] getTypeBytes() {
- return typeBytes;
- }
-
- public LongEncoder getMvccVersionEncoder() {
- return mvccVersionEncoder;
- }
-
- public ByteRangeSet getFamilySorter() {
- return familyDeduplicator;
- }
-
- public ByteRangeSet getQualifierSorter() {
- return qualifierDeduplicator;
- }
-
- public ColumnSectionWriter getFamilyWriter() {
- return familyWriter;
- }
-
- public ColumnSectionWriter getQualifierWriter() {
- return qualifierWriter;
- }
-
- public RowSectionWriter getRowWriter() {
- return rowWriter;
- }
-
- public ByteRange getValueByteRange() {
- return new ByteRange(values, 0, totalValueBytes);
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java
deleted file mode 100644
index 3f9a00b..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode;
-
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-
-/**
- * Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and
- * would be more garbage than the data in the block. A new encoder is needed for each block in
- * a flush, compaction, RPC response, etc.
- *
- * It is not a pool in the traditional sense, but implements the semantics of a traditional pool
- * via ThreadLocals to avoid sharing between threads. Sharing between threads would not be
- * very expensive given that it's accessed per-block, but this is just as easy.
- *
- * This pool implementation assumes there is a one-to-one mapping between a single thread and a
- * single flush or compaction.
- */
-@InterfaceAudience.Private
-public class ThreadLocalEncoderPool implements EncoderPool{
-
- private static final ThreadLocal ENCODER
- = new ThreadLocal();
-
- /**
- * Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the
- * current thread.
- */
- @Override
- public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) {
- PrefixTreeEncoder builder = ENCODER.get();
- builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion);
- ENCODER.set(builder);
- return builder;
- }
-
- @Override
- public void checkIn(PrefixTreeEncoder encoder) {
- // attached to thread on checkOut, so shouldn't need to do anything here
-
- // do we need to worry about detaching encoders from compaction threads or are the same threads
- // used over and over
- }
-
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
deleted file mode 100644
index b84e15a..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.column;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Strings;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hbase.util.vint.UFIntTool;
-import org.apache.hbase.util.vint.UVIntTool;
-
-/**
- * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly.
- * The family and qualifier sections of the data block are made of 1 or more of these nodes.
- *
- * Each node is composed of 3 sections:
- *
tokenLength: UVInt (normally 1 byte) indicating the number of token bytes
- *
token[]: the actual token bytes
- *
parentStartPosition: the offset of the next node from the start of the family or qualifier
- * section
- */
-@InterfaceAudience.Private
-public class ColumnNodeWriter{
-
- /************* fields ****************************/
-
- protected TokenizerNode builderNode;
- protected PrefixTreeBlockMeta blockMeta;
-
- protected boolean familyVsQualifier;
-
- protected int tokenLength;
- protected byte[] token;
- protected int parentStartPosition;
-
-
- /*************** construct **************************/
-
- public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
- boolean familyVsQualifier) {
- this.blockMeta = blockMeta;
- this.builderNode = builderNode;
- this.familyVsQualifier = familyVsQualifier;
- calculateTokenLength();
- }
-
-
- /************* methods *******************************/
-
- public boolean isRoot() {
- return parentStartPosition == 0;
- }
-
- private void calculateTokenLength() {
- tokenLength = builderNode.getTokenLength();
- token = new byte[tokenLength];
- }
-
- /**
- * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a
- * placeholder.
- * @param offsetWidthPlaceholder the placeholder
- * @return node width
- */
- public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) {
- int width = 0;
- width += UVIntTool.numBytes(tokenLength);
- width += token.length;
- width += offsetWidthPlaceholder;
- return width;
- }
-
- public void writeBytes(OutputStream os) throws IOException {
- int parentOffsetWidth;
- if (familyVsQualifier) {
- parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
- } else {
- parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
- }
- UVIntTool.writeBytes(tokenLength, os);
- os.write(token);
- UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os);
- }
-
- public void setTokenBytes(ByteRange source) {
- source.deepCopySubRangeTo(0, tokenLength, token, 0);
- }
-
-
- /****************** standard methods ************************/
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ",");
- sb.append("[");
- sb.append(Bytes.toString(token));
- sb.append("]->");
- sb.append(parentStartPosition);
- return sb.toString();
- }
-
-
- /************************** get/set ***********************/
-
- public void setParentStartPosition(int parentStartPosition) {
- this.parentStartPosition = parentStartPosition;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
deleted file mode 100644
index 3d2457d..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.column;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hbase.util.vint.UFIntTool;
-
-import com.google.common.collect.Lists;
-
-/**
- * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family
- * section is written after the row section, and qualifier section after family section.
- *
- * The family and qualifier tries, or "column tries", are structured differently than the row trie.
- * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and
- * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a
- * row, the row trie contains a list of offsets into the column sections (along with pointers to
- * timestamps and other per-cell fields). These offsets point to the last column node/token that
- * comprises the column name. To assemble the column name, the trie is traversed in reverse (right
- * to left), with the rightmost tokens pointing to the start of their "parent" node which is the
- * node to the left.
- *
- * This choice was made to reduce the size of the column trie by storing the minimum amount of
- * offset data. As a result, to find a specific qualifier within a row, you must do a binary search
- * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might
- * encode the columns in both a forward and reverse trie, which would convert binary searches into
- * more efficient trie searches which would be beneficial for wide rows.
- */
-@InterfaceAudience.Private
-public class ColumnSectionWriter {
-
- public static final int EXPECTED_NUBS_PLUS_LEAVES = 100;
-
- /****************** fields ****************************/
-
- private PrefixTreeBlockMeta blockMeta;
-
- private boolean familyVsQualifier;
- private Tokenizer tokenizer;
- private int numBytes = 0;
- private ArrayList nonLeaves;
- private ArrayList leaves;
- private ArrayList allNodes;
- private ArrayList columnNodeWriters;
- private List outputArrayOffsets;
-
-
- /*********************** construct *********************/
-
- public ColumnSectionWriter() {
- this.nonLeaves = Lists.newArrayList();
- this.leaves = Lists.newArrayList();
- this.outputArrayOffsets = Lists.newArrayList();
- }
-
- public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
- boolean familyVsQualifier) {
- this();// init collections
- reconstruct(blockMeta, builder, familyVsQualifier);
- }
-
- public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
- boolean familyVsQualifier) {
- this.blockMeta = blockMeta;
- this.tokenizer = builder;
- this.familyVsQualifier = familyVsQualifier;
- }
-
- public void reset() {
- numBytes = 0;
- nonLeaves.clear();
- leaves.clear();
- outputArrayOffsets.clear();
- }
-
-
- /****************** methods *******************************/
-
- public ColumnSectionWriter compile() {
- if (familyVsQualifier) {
- // do nothing. max family length fixed at Byte.MAX_VALUE
- } else {
- blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
- }
-
- tokenizer.setNodeFirstInsertionIndexes();
-
- tokenizer.appendNodes(nonLeaves, true, false);
-
- tokenizer.appendNodes(leaves, false, true);
-
- allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size());
- allNodes.addAll(nonLeaves);
- allNodes.addAll(leaves);
-
- columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
- for (int i = 0; i < allNodes.size(); ++i) {
- TokenizerNode node = allNodes.get(i);
- columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
- }
-
- // leaf widths are known at this point, so add them up
- int totalBytesWithoutOffsets = 0;
- for (int i = allNodes.size() - 1; i >= 0; --i) {
- ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
- // leaves store all but their first token byte
- totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0);
- }
-
- // figure out how wide our offset FInts are
- int parentOffsetWidth = 0;
- while (true) {
- ++parentOffsetWidth;
- int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size();
- if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) {
- numBytes = numBytesFinder;
- break;
- }// it fits
- }
- if (familyVsQualifier) {
- blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
- } else {
- blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
- }
-
- int forwardIndex = 0;
- for (int i = 0; i < allNodes.size(); ++i) {
- TokenizerNode node = allNodes.get(i);
- ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i);
- int fullNodeWidth = columnNodeWriter
- .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth);
- node.setOutputArrayOffset(forwardIndex);
- columnNodeWriter.setTokenBytes(node.getToken());
- if (node.isRoot()) {
- columnNodeWriter.setParentStartPosition(0);
- } else {
- columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset());
- }
- forwardIndex += fullNodeWidth;
- }
-
- tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
-
- return this;
- }
-
- public void writeBytes(OutputStream os) throws IOException {
- for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) {
- columnNodeWriter.writeBytes(os);
- }
- }
-
-
- /************* get/set **************************/
-
- public ArrayList getColumnNodeWriters() {
- return columnNodeWriters;
- }
-
- public int getNumBytes() {
- return numBytes;
- }
-
- public int getOutputArrayOffset(int sortedIndex) {
- return outputArrayOffsets.get(sortedIndex);
- }
-
- public ArrayList getNonLeaves() {
- return nonLeaves;
- }
-
- public ArrayList getLeaves() {
- return leaves;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
deleted file mode 100644
index 963c307..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.other;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each
- * KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta,
- * therefore not repeating it for each cell and saving 1 byte per cell.
- */
-@InterfaceAudience.Private
-public class CellTypeEncoder {
-
- /************* fields *********************/
-
- protected boolean pendingFirstType = true;
- protected boolean allSameType = true;
- protected byte onlyType;
-
-
- /************* construct *********************/
-
- public void reset() {
- pendingFirstType = true;
- allSameType = true;
- }
-
-
- /************* methods *************************/
-
- public void add(byte type) {
- if (pendingFirstType) {
- onlyType = type;
- pendingFirstType = false;
- } else if (onlyType != type) {
- allSameType = false;
- }
- }
-
-
- /**************** get/set **************************/
-
- public boolean areAllSameType() {
- return allSameType;
- }
-
- public byte getOnlyType() {
- return onlyType;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java
deleted file mode 100644
index baf20f6..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.other;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.HashSet;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hbase.util.vint.UFIntTool;
-
-import com.google.common.base.Joiner;
-
-/**
- * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a
- * collection of Cells.
- *
- * 1. add longs to a HashSet for fast de-duplication
- * 2. keep track of the min and max
- * 3. copy all values to a new long[]
- * 4. Collections.sort the long[]
- * 5. calculate maxDelta = max - min
- * 6. determine FInt width based on maxDelta
- * 7. PrefixTreeEncoder binary searches to find index of each value
- */
-@InterfaceAudience.Private
-public class LongEncoder {
-
- /****************** fields ****************************/
-
- protected HashSet uniqueValues;
- protected long[] sortedUniqueValues;
- protected long min, max, maxDelta;
-
- protected int bytesPerDelta;
- protected int bytesPerIndex;
- protected int totalCompressedBytes;
-
-
- /****************** construct ****************************/
-
- public LongEncoder() {
- this.uniqueValues = new HashSet();
- }
-
- public void reset() {
- uniqueValues.clear();
- sortedUniqueValues = null;
- min = Long.MAX_VALUE;
- max = Long.MIN_VALUE;
- maxDelta = Long.MIN_VALUE;
- bytesPerIndex = 0;
- bytesPerDelta = 0;
- totalCompressedBytes = 0;
- }
-
-
- /************* methods ***************************/
-
- public void add(long timestamp) {
- uniqueValues.add(timestamp);
- }
-
- public LongEncoder compile() {
- int numUnique = uniqueValues.size();
- if (numUnique == 1) {
- min = CollectionUtils.getFirst(uniqueValues);
- sortedUniqueValues = new long[] { min };
- return this;
- }
-
- sortedUniqueValues = new long[numUnique];
- int lastIndex = -1;
- for (long value : uniqueValues) {
- sortedUniqueValues[++lastIndex] = value;
- }
- Arrays.sort(sortedUniqueValues);
- min = ArrayUtils.getFirst(sortedUniqueValues);
- max = ArrayUtils.getLast(sortedUniqueValues);
- maxDelta = max - min;
- if (maxDelta > 0) {
- bytesPerDelta = UFIntTool.numBytes(maxDelta);
- } else {
- bytesPerDelta = 0;
- }
-
- int maxIndex = numUnique - 1;
- bytesPerIndex = UFIntTool.numBytes(maxIndex);
-
- totalCompressedBytes = numUnique * bytesPerDelta;
-
- return this;
- }
-
- public long getDelta(int index) {
- if (sortedUniqueValues.length == 0) {
- return 0;
- }
- return sortedUniqueValues[index] - min;
- }
-
- public int getIndex(long value) {
- // should always find an exact match
- return Arrays.binarySearch(sortedUniqueValues, value);
- }
-
- public void writeBytes(OutputStream os) throws IOException {
- for (int i = 0; i < sortedUniqueValues.length; ++i) {
- long delta = sortedUniqueValues[i] - min;
- UFIntTool.writeBytes(bytesPerDelta, delta, os);
- }
- }
-
- //convenience method for tests
- public byte[] getByteArray() throws IOException{
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- writeBytes(baos);
- return baos.toByteArray();
- }
-
- public int getOutputArrayLength() {
- return sortedUniqueValues.length * bytesPerDelta;
- }
-
- public int getNumUniqueValues() {
- return sortedUniqueValues.length;
- }
-
-
- /******************* Object methods **********************/
-
- @Override
- public String toString() {
- if (ArrayUtils.isEmpty(sortedUniqueValues)) {
- return "[]";
- }
- return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]";
- }
-
-
- /******************** get/set **************************/
-
- public long getMin() {
- return min;
- }
-
- public int getBytesPerDelta() {
- return bytesPerDelta;
- }
-
- public int getBytesPerIndex() {
- return bytesPerIndex;
- }
-
- public int getTotalCompressedBytes() {
- return totalCompressedBytes;
- }
-
- public long[] getSortedUniqueTimestamps() {
- return sortedUniqueValues;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
deleted file mode 100644
index 748a7f6..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.row;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRangeTool;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hbase.util.vint.UFIntTool;
-import org.apache.hbase.util.vint.UVIntTool;
-
-/**
- * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf.
- * Please see the write() method for the order in which data is written.
- */
-@InterfaceAudience.Private
-public class RowNodeWriter{
- protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class);
-
- /********************* fields ******************************/
-
- protected PrefixTreeEncoder prefixTreeEncoder;
- protected PrefixTreeBlockMeta blockMeta;
- protected TokenizerNode tokenizerNode;
-
- protected int tokenWidth;
- protected int fanOut;
- protected int numCells;
-
- protected int width;
-
-
- /*********************** construct *************************/
-
- public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) {
- reconstruct(keyValueBuilder, tokenizerNode);
- }
-
- public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) {
- this.prefixTreeEncoder = prefixTreeEncoder;
- reset(tokenizerNode);
- }
-
- public void reset(TokenizerNode node) {
- this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks
- this.tokenizerNode = node;
- this.tokenWidth = 0;
- this.fanOut = 0;
- this.numCells = 0;
- this.width = 0;
- calculateOffsetsAndLengths();
- }
-
-
- /********************* methods ****************************/
-
- protected void calculateOffsetsAndLengths(){
- tokenWidth = tokenizerNode.getTokenLength();
- if(!tokenizerNode.isRoot()){
- --tokenWidth;//root has no parent
- }
- fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren());
- numCells = tokenizerNode.getNumOccurrences();
- }
-
- public int calculateWidth(){
- calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth());
- return width;
- }
-
- public int calculateWidthOverrideOffsetWidth(int offsetWidth){
- width = 0;
- width += UVIntTool.numBytes(tokenWidth);
- width += tokenWidth;
-
- width += UVIntTool.numBytes(fanOut);
- width += fanOut;
-
- width += UVIntTool.numBytes(numCells);
-
- if(tokenizerNode.hasOccurrences()){
- int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
- + blockMeta.getQualifierOffsetWidth()
- + blockMeta.getTimestampIndexWidth()
- + blockMeta.getMvccVersionIndexWidth()
- + blockMeta.getKeyValueTypeWidth()
- + blockMeta.getValueOffsetWidth()
- + blockMeta.getValueLengthWidth();
- width += numCells * fixedBytesPerCell;
- }
-
- if( ! tokenizerNode.isLeaf()){
- width += fanOut * offsetWidth;
- }
-
- return width;
- }
-
-
- /*********************** writing the compiled structure to the OutputStream ***************/
-
- public void write(OutputStream os) throws IOException{
- //info about this row trie node
- writeRowToken(os);
- writeFan(os);
- writeNumCells(os);
-
- //UFInt indexes and offsets for each cell in the row (if nub or leaf)
- writeFamilyNodeOffsets(os);
- writeQualifierNodeOffsets(os);
- writeTimestampIndexes(os);
- writeMvccVersionIndexes(os);
- writeCellTypes(os);
- writeValueOffsets(os);
- writeValueLengths(os);
-
- //offsets to the children of this row trie node (if branch or nub)
- writeNextRowTrieNodeOffsets(os);
- }
-
-
- /**
- * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3
- * fields can reproduce all the row keys that compose the block.
- */
-
- /**
- * UVInt: tokenWidth
- * bytes: token
- */
- protected void writeRowToken(OutputStream os) throws IOException {
- UVIntTool.writeBytes(tokenWidth, os);
- int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1;
- ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex);
- }
-
- /**
- * UVInt: numFanBytes/fanOut
- * bytes: each fan byte
- */
- public void writeFan(OutputStream os) throws IOException {
- UVIntTool.writeBytes(fanOut, os);
- if (fanOut <= 0) {
- return;
- }
- ArrayList children = tokenizerNode.getChildren();
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- os.write(child.getToken().get(0));// first byte of each child's token
- }
- }
-
- /**
- * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes
- */
- protected void writeNumCells(OutputStream os) throws IOException {
- UVIntTool.writeBytes(numCells, os);
- }
-
-
- /**
- * The following methods write data for each cell in the row, mostly consisting of indexes or
- * offsets into the timestamp/column data structures that are written in the middle of the block.
- * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary
- * search of a particular column/timestamp combination.
- *
- * Branch nodes will not have any data in these sections.
- */
-
- protected void writeFamilyNodeOffsets(OutputStream os) throws IOException {
- if (blockMeta.getFamilyOffsetWidth() <= 0) {
- return;
- }
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode
- .getFirstInsertionIndex() + i : 0;
- int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId(
- cellInsertionIndex);
- int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset(
- sortedIndex);
- UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os);
- }
- }
-
- protected void writeQualifierNodeOffsets(OutputStream os) throws IOException {
- if (blockMeta.getQualifierOffsetWidth() <= 0) {
- return;
- }
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId(
- cellInsertionIndex);
- int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset(
- sortedIndex);
- UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os);
- }
- }
-
- protected void writeTimestampIndexes(OutputStream os) throws IOException {
- if (blockMeta.getTimestampIndexWidth() <= 0) {
- return;
- }
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex];
- int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp);
- UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os);
- }
- }
-
- protected void writeMvccVersionIndexes(OutputStream os) throws IOException {
- if (blockMeta.getMvccVersionIndexWidth() <= 0) {
- return;
- }
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex];
- int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion);
- UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os);
- }
- }
-
- protected void writeCellTypes(OutputStream os) throws IOException {
- if (blockMeta.isAllSameType()) {
- return;
- }
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]);
- }
- }
-
- protected void writeValueOffsets(OutputStream os) throws IOException {
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex);
- UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os);
- }
- }
-
- protected void writeValueLengths(OutputStream os) throws IOException {
- for (int i = 0; i < numCells; ++i) {
- int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
- int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex);
- UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os);
- }
- }
-
-
- /**
- * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
- */
- protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException {
- ArrayList children = tokenizerNode.getChildren();
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex();
- UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os);
- }
- }
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
deleted file mode 100644
index f1dca8d..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.row;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta;
-import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
-import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
-import org.apache.hbase.util.vint.UFIntTool;
-
-import com.google.common.collect.Lists;
-
-/**
- * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row
- * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie
- * also contains references to offsets in the other sections of the data block that enable the
- * decoder to match a row key with its qualifier, timestamp, type, value, etc.
- *
- * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the
- * internals of each row node.
- */
-@InterfaceAudience.Private
-public class RowSectionWriter {
-
- /***************** fields **************************/
-
- protected PrefixTreeEncoder prefixTreeEncoder;
-
- protected PrefixTreeBlockMeta blockMeta;
-
- protected int numBytes;
-
- protected ArrayList nonLeaves;
- protected ArrayList leaves;
-
- protected ArrayList leafWriters;
- protected ArrayList nonLeafWriters;
-
- protected int numLeafWriters;
- protected int numNonLeafWriters;
-
-
- /********************* construct **********************/
-
- public RowSectionWriter() {
- this.nonLeaves = Lists.newArrayList();
- this.leaves = Lists.newArrayList();
- this.leafWriters = Lists.newArrayList();
- this.nonLeafWriters = Lists.newArrayList();
- }
-
- public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) {
- reconstruct(prefixTreeEncoder);
- }
-
- public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) {
- this.prefixTreeEncoder = prefixTreeEncoder;
- this.blockMeta = prefixTreeEncoder.getBlockMeta();
- reset();
- }
-
- public void reset() {
- numBytes = 0;
- nonLeaves.clear();
- leaves.clear();
- numLeafWriters = 0;
- numNonLeafWriters = 0;
- }
-
-
- /****************** methods *******************************/
-
- public RowSectionWriter compile() {
- blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength());
- prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes();
-
- prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false);
- prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true);
-
- // track the starting position of each node in final output
- int negativeIndex = 0;
-
- // create leaf writer nodes
- // leaf widths are known at this point, so add them up
- int totalLeafBytes = 0;
- for (int i = leaves.size() - 1; i >= 0; --i) {
- TokenizerNode leaf = leaves.get(i);
- RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf);
- ++numLeafWriters;
- // leaves store all but their first token byte
- int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0);
- totalLeafBytes += leafNodeWidth;
- negativeIndex += leafNodeWidth;
- leaf.setNegativeIndex(negativeIndex);
- }
-
- int totalNonLeafBytesWithoutOffsets = 0;
- int totalChildPointers = 0;
- for (int i = nonLeaves.size() - 1; i >= 0; --i) {
- TokenizerNode nonLeaf = nonLeaves.get(i);
- RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf);
- ++numNonLeafWriters;
- totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0);
- totalChildPointers += nonLeaf.getNumChildren();
- }
-
- // figure out how wide our offset FInts are
- int offsetWidth = 0;
- while (true) {
- ++offsetWidth;
- int offsetBytes = totalChildPointers * offsetWidth;
- int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes;
- if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) {
- // it fits
- numBytes = totalRowBytes;
- break;
- }
- }
- blockMeta.setNextNodeOffsetWidth(offsetWidth);
-
- // populate negativeIndexes
- for (int i = nonLeaves.size() - 1; i >= 0; --i) {
- TokenizerNode nonLeaf = nonLeaves.get(i);
- int writerIndex = nonLeaves.size() - i - 1;
- RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex);
- int nodeWidth = nonLeafWriter.calculateWidth();
- negativeIndex += nodeWidth;
- nonLeaf.setNegativeIndex(negativeIndex);
- }
-
- return this;
- }
-
- protected RowNodeWriter initializeWriter(List list, int index,
- TokenizerNode builderNode) {
- RowNodeWriter rowNodeWriter = null;
- //check if there is an existing node we can recycle
- if (index >= list.size()) {
- //there are not enough existing nodes, so add a new one which will be retrieved below
- list.add(new RowNodeWriter(prefixTreeEncoder, builderNode));
- }
- rowNodeWriter = list.get(index);
- rowNodeWriter.reset(builderNode);
- return rowNodeWriter;
- }
-
-
- public void writeBytes(OutputStream os) throws IOException {
- for (int i = numNonLeafWriters - 1; i >= 0; --i) {
- RowNodeWriter nonLeafWriter = nonLeafWriters.get(i);
- nonLeafWriter.write(os);
- }
- // duplicates above... written more for clarity right now
- for (int i = numLeafWriters - 1; i >= 0; --i) {
- RowNodeWriter leafWriter = leafWriters.get(i);
- leafWriter.write(os);
- }
- }
-
-
- /***************** static ******************************/
-
- protected static ArrayList filterByLeafAndReverse(
- ArrayList ins, boolean leaves) {
- ArrayList outs = Lists.newArrayList();
- for (int i = ins.size() - 1; i >= 0; --i) {
- TokenizerNode n = ins.get(i);
- if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) {
- outs.add(ins.get(i));
- }
- }
- return outs;
- }
-
-
- /************* get/set **************************/
-
- public int getNumBytes() {
- return numBytes;
- }
-
- public ArrayList getNonLeaves() {
- return nonLeaves;
- }
-
- public ArrayList getLeaves() {
- return leaves;
- }
-
- public ArrayList getNonLeafWriters() {
- return nonLeafWriters;
- }
-
- public ArrayList getLeafWriters() {
- return leafWriters;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java
deleted file mode 100644
index e1082e0..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.tokenize;
-
-import java.util.Comparator;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Determines order of nodes in the output array. Maybe possible to optimize further.
- */
-@InterfaceAudience.Private
-public class TokenDepthComparator implements Comparator {
-
- @Override
- public int compare(TokenizerNode a, TokenizerNode b) {
- if(a==null){
- throw new IllegalArgumentException("a cannot be null");
- }
- if(b==null){
- throw new IllegalArgumentException("b cannot be null");
- }
-
- // put leaves at the end
- if (!a.isLeaf() && b.isLeaf()) {
- return -1;
- }
- if (a.isLeaf() && !b.isLeaf()) {
- return 1;
- }
-
- if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability)
- return a.getId() < b.getId() ? -1 : 1;
- }
-
- // compare depth
- if (a.getTokenOffset() < b.getTokenOffset()) {
- return -1;
- }
- if (a.getTokenOffset() > b.getTokenOffset()) {
- return 1;
- }
-
- // if same depth, return lower id first. ids are unique
- return a.getId() < b.getId() ? -1 : 1;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
deleted file mode 100644
index 9b43c47..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.tokenize;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-
-import com.google.common.collect.Lists;
-
-/**
- * Data structure used in the first stage of PrefixTree encoding:
- *
accepts a sorted stream of ByteRanges
- *
splits them into a set of tokens, each held by a {@link TokenizerNode}
- *
connects the TokenizerNodes via standard java references
- *
keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content
- *
- * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier
- * encoding.
- */
-@InterfaceAudience.Private
-public class Tokenizer{
-
- /***************** fields **************************/
-
- protected int numArraysAdded = 0;
- protected long lastNodeId = -1;
- protected ArrayList nodes;
- protected int numNodes;
- protected TokenizerNode root;
- protected byte[] tokens;
- protected int tokensLength;
-
- protected int maxElementLength = 0;
- // number of levels in the tree assuming root level is 0
- protected int treeDepth = 0;
-
-
- /******************* construct *******************/
-
- public Tokenizer() {
- this.nodes = Lists.newArrayList();
- this.tokens = new byte[0];
- }
-
- public void reset() {
- numArraysAdded = 0;
- lastNodeId = -1;
- numNodes = 0;
- tokensLength = 0;
- root = null;
- maxElementLength = 0;
- treeDepth = 0;
- }
-
-
- /***************** building *************************/
-
- public void addAll(ArrayList sortedByteRanges) {
- for (int i = 0; i < sortedByteRanges.size(); ++i) {
- ByteRange byteRange = sortedByteRanges.get(i);
- addSorted(byteRange);
- }
- }
-
- public void addSorted(final ByteRange bytes) {
- ++numArraysAdded;
- if (bytes.getLength() > maxElementLength) {
- maxElementLength = bytes.getLength();
- }
- if (root == null) {
- // nodeDepth of firstNode (non-root) is 1
- root = addNode(null, 1, 0, bytes, 0);
- } else {
- root.addSorted(bytes);
- }
- }
-
- public void incrementNumOccurrencesOfLatestValue(){
- CollectionUtils.getLast(nodes).incrementNumOccurrences(1);
- }
-
- protected long nextNodeId() {
- return ++lastNodeId;
- }
-
- protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset,
- final ByteRange token, int inputTokenOffset) {
- int inputTokenLength = token.getLength() - inputTokenOffset;
- int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset);
- TokenizerNode node = null;
- if (nodes.size() <= numNodes) {
- node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset,
- inputTokenLength);
- nodes.add(node);
- } else {
- node = nodes.get(numNodes);
- node.reset();
- node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength);
- }
- ++numNodes;
- return node;
- }
-
- protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) {
- int newOffset = tokensLength;
- int inputTokenLength = token.getLength() - inputTokenOffset;
- int newMinimum = tokensLength + inputTokenLength;
- tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum);
- token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength);
- tokensLength += inputTokenLength;
- return newOffset;
- }
-
- protected void submitMaxNodeDepthCandidate(int nodeDepth) {
- if (nodeDepth > treeDepth) {
- treeDepth = nodeDepth;
- }
- }
-
-
- /********************* read ********************/
-
- public int getNumAdded(){
- return numArraysAdded;
- }
-
- // for debugging
- public ArrayList getNodes(boolean includeNonLeaves, boolean includeLeaves) {
- ArrayList nodes = Lists.newArrayList();
- root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves);
- return nodes;
- }
-
- public void appendNodes(List appendTo, boolean includeNonLeaves,
- boolean includeLeaves) {
- root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
- }
-
- public List getArrays() {
- List nodes = new ArrayList();
- root.appendNodesToExternalList(nodes, true, true);
- List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes));
- for (int i = 0; i < nodes.size(); ++i) {
- TokenizerNode node = nodes.get(i);
- for (int j = 0; j < node.getNumOccurrences(); ++j) {
- byte[] byteArray = node.getNewByteArray();
- byteArrays.add(byteArray);
- }
- }
- return byteArrays;
- }
-
- //currently unused, but working and possibly useful in the future
- public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
- int keyLength) {
- root.getNode(resultHolder, key, keyOffset, keyLength);
- }
-
-
- /********************** write ***************************/
-
- public Tokenizer setNodeFirstInsertionIndexes() {
- root.setInsertionIndexes(0);
- return this;
- }
-
- public Tokenizer appendOutputArrayOffsets(List offsets) {
- root.appendOutputArrayOffsets(offsets);
- return this;
- }
-
-
- /********************* print/debug ********************/
-
- protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false;
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(getStructuralString());
- if (INCLUDE_FULL_TREE_IN_TO_STRING) {
- for (byte[] bytes : getArrays()) {
- if (sb.length() > 0) {
- sb.append("\n");
- }
- sb.append(Bytes.toString(bytes));
- }
- }
- return sb.toString();
- }
-
- public String getStructuralString() {
- List nodes = getNodes(true, true);
- StringBuilder sb = new StringBuilder();
- for (TokenizerNode node : nodes) {
- String line = node.getPaddedTokenAndOccurrenceString();
- sb.append(line + "\n");
- }
- return sb.toString();
- }
-
-
- /****************** get/set ************************/
-
- public TokenizerNode getRoot() {
- return root;
- }
-
- public int getMaxElementLength() {
- return maxElementLength;
- }
-
- public int getTreeDepth() {
- return treeDepth;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java
deleted file mode 100644
index 2b8a86c..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.tokenize;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.Strings;
-
-import com.google.common.collect.Lists;
-
-/**
- * Individual node in a Trie structure. Each node is one of 3 types:
- *
Branch: an internal trie node that may have a token and must have multiple children, but does
- * not represent an actual input byte[], hence its numOccurrences is 0
- *
Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the
- * last bytes in the input byte[]s.
- *
Nub: a combination of a branch and leaf. Its token represents the last bytes of input
- * byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s
- * that add bytes to this nodes input byte[].
- *
- * numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2
- */
-@InterfaceAudience.Private
-public class TokenizerNode{
-
- /*
- * Ref to data structure wrapper
- */
- protected Tokenizer builder;
-
- /******************************************************************
- * Tree content/structure used during tokenization
- * ****************************************************************/
-
- /*
- * ref to parent trie node
- */
- protected TokenizerNode parent;
-
- /*
- * node depth in trie, irrespective of each node's token length
- */
- protected int nodeDepth;
-
- /*
- * start index of this token in original byte[]
- */
- protected int tokenStartOffset;
-
- /*
- * bytes for this trie node. can be length 0 in root node
- */
- protected ByteRange token;
-
- /*
- * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for
- * nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing
- * that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode.
- */
- protected int numOccurrences;
-
- /*
- * The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256
- * child nodes.
- */
- protected ArrayList children;
-
-
- /*
- * Fields used later in the encoding process for sorting the nodes into the order they'll be
- * written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer
- * are not generic data structures but instead are specific to HBase PrefixTree encoding.
- */
-
- /*
- * unique id assigned to each TokenizerNode
- */
- protected long id;
-
- /*
- * set >=0 for nubs and leaves
- */
- protected int firstInsertionIndex = -1;
-
- /*
- * A positive value indicating how many bytes before the end of the block this node will start. If
- * the section is 55 bytes and negativeOffset is 9, then the node will start at 46.
- */
- protected int negativeIndex = 0;
-
- /*
- * The offset in the output array at which to start writing this node's token bytes. Influenced
- * by the lengths of all tokens sorted before this one.
- */
- protected int outputArrayOffset = -1;
-
-
- /*********************** construct *****************************/
-
- public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth,
- int tokenStartOffset, int tokenOffset, int tokenLength) {
- this.token = new ByteRange();
- reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength);
- this.children = Lists.newArrayList();
- }
-
- /*
- * Sub-constructor for initializing all fields without allocating a new object. Used by the
- * regular constructor.
- */
- public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth,
- int tokenStartOffset, int tokenOffset, int tokenLength) {
- this.builder = builder;
- this.id = builder.nextNodeId();
- this.parent = parent;
- this.nodeDepth = nodeDepth;
- builder.submitMaxNodeDepthCandidate(nodeDepth);
- this.tokenStartOffset = tokenStartOffset;
- this.token.set(builder.tokens, tokenOffset, tokenLength);
- this.numOccurrences = 1;
- }
-
- /*
- * Clear the state of this node so that it looks like it was just allocated.
- */
- public void reset() {
- builder = null;
- parent = null;
- nodeDepth = 0;
- tokenStartOffset = 0;
- token.clear();
- numOccurrences = 0;
- children.clear();// branches & nubs
-
- // ids/offsets. used during writing to byte[]
- id = 0;
- firstInsertionIndex = -1;// set >=0 for nubs and leaves
- negativeIndex = 0;
- outputArrayOffset = -1;
- }
-
-
- /************************* building *********************************/
-
- /*
- *
Only public method used during the tokenization process
- *
Requires that the input ByteRange sort after the previous, and therefore after all previous
- * inputs
- *
Only looks at bytes of the input array that align with this node's token
- */
- public void addSorted(final ByteRange bytes) {// recursively build the tree
-
- /*
- * Recurse deeper into the existing trie structure
- */
- if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) {
- TokenizerNode lastChild = CollectionUtils.getLast(children);
- if (lastChild.partiallyMatchesToken(bytes)) {
- lastChild.addSorted(bytes);
- return;
- }
- }
-
- /*
- * Recursion ended. We must either
- *
1: increment numOccurrences if this input was equal to the previous
- *
2: convert this node from a leaf to a nub, and add a new child leaf
- *
3: split this node into a branch and leaf, and then add a second leaf
- */
-
- // add it as a child of this node
- int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length
- int tailOffset = tokenStartOffset + numIdenticalTokenBytes;
- int tailLength = bytes.getLength() - tailOffset;
-
- if (numIdenticalTokenBytes == token.getLength()) {
- if (tailLength == 0) {// identical to this node (case 1)
- incrementNumOccurrences(1);
- } else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2)
- int childNodeDepth = nodeDepth + 1;
- int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes;
- TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset,
- bytes, tailOffset);
- addChild(newChildNode);
- }
- } else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3)
- split(numIdenticalTokenBytes, bytes);
- }
- }
-
-
- protected void addChild(TokenizerNode node) {
- node.setParent(this);
- children.add(node);
- }
-
-
- /**
- * Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the
- * method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output
- * will be 3 nodes:
- *
1: B <- branch
- *
2: AA <- leaf
- *
3: OO <- leaf
- *
- * @param numTokenBytesToRetain => 1 (the B)
- * @param bytes => BOO
- */
- protected void split(int numTokenBytesToRetain, final ByteRange bytes) {
- int childNodeDepth = nodeDepth;
- int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain;
-
- //create leaf AA
- TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
- token, numTokenBytesToRetain);
- firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences
- token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B
- numOccurrences = 0;//current node is now a branch
-
- moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B)
- addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children
-
- //create leaf OO
- TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset,
- bytes, tokenStartOffset + numTokenBytesToRetain);
- addChild(secondChild);//add the new leaf (00) to the branch's (B's) children
-
- // we inserted branch node B as a new level above/before the two children, so increment the
- // depths of the children below
- firstChild.incrementNodeDepthRecursively();
- secondChild.incrementNodeDepthRecursively();
- }
-
-
- protected void incrementNodeDepthRecursively() {
- ++nodeDepth;
- builder.submitMaxNodeDepthCandidate(nodeDepth);
- for (int i = 0; i < children.size(); ++i) {
- children.get(i).incrementNodeDepthRecursively();
- }
- }
-
-
- protected void moveChildrenToDifferentParent(TokenizerNode newParent) {
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- child.setParent(newParent);
- newParent.children.add(child);
- }
- children.clear();
- }
-
-
- /************************ byte[] utils *************************/
-
- protected boolean partiallyMatchesToken(ByteRange bytes) {
- return numIdenticalBytes(bytes) > 0;
- }
-
- protected boolean matchesToken(ByteRange bytes) {
- return numIdenticalBytes(bytes) == getTokenLength();
- }
-
- protected int numIdenticalBytes(ByteRange bytes) {
- return token.numEqualPrefixBytes(bytes, tokenStartOffset);
- }
-
-
- /***************** moving nodes around ************************/
-
- public void appendNodesToExternalList(List appendTo, boolean includeNonLeaves,
- boolean includeLeaves) {
- if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) {
- appendTo.add(this);
- }
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves);
- }
- }
-
- public int setInsertionIndexes(int nextIndex) {
- int newNextIndex = nextIndex;
- if (hasOccurrences()) {
- setFirstInsertionIndex(nextIndex);
- newNextIndex += numOccurrences;
- }
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- newNextIndex = child.setInsertionIndexes(newNextIndex);
- }
- return newNextIndex;
- }
-
- public void appendOutputArrayOffsets(List offsets) {
- if (hasOccurrences()) {
- offsets.add(outputArrayOffset);
- }
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- child.appendOutputArrayOffsets(offsets);
- }
- }
-
-
- /***************** searching *********************************/
-
- /*
- * Do a trie style search through the tokenizer. One option for looking up families or qualifiers
- * during encoding, but currently unused in favor of tracking this information as they are added.
- *
- * Keeping code pending further performance testing.
- */
- public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset,
- int keyLength) {
- int thisNodeDepthPlusLength = tokenStartOffset + token.getLength();
-
- // quick check if the key is shorter than this node (may not work for binary search)
- if (CollectionUtils.isEmpty(children)) {
- if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes
- resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
- return;
- }
- }
-
- // all token bytes must match
- for (int i = 0; i < token.getLength(); ++i) {
- if (key[tokenStartOffset + keyOffset + i] != token.get(i)) {
- // TODO return whether it's before or after so we can binary search
- resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
- return;
- }
- }
-
- if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) {
- resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH
- return;
- }
-
- if (CollectionUtils.notEmpty(children)) {
- // TODO binary search the children
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- child.getNode(resultHolder, key, keyOffset, keyLength);
- if (resultHolder.isMatch()) {
- return;
- } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) {
- // passed it, so it doesn't exist
- resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
- return;
- }
- // key is still AFTER the current node, so continue searching
- }
- }
-
- // checked all children (or there were no children), and didn't find it
- resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null);
- return;
- }
-
-
- /****************** writing back to byte[]'s *************************/
-
- public byte[] getNewByteArray() {
- byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()];
- fillInBytes(arrayToFill);
- return arrayToFill;
- }
-
- public void fillInBytes(byte[] arrayToFill) {
- for (int i = 0; i < token.getLength(); ++i) {
- arrayToFill[tokenStartOffset + i] = token.get(i);
- }
- if (parent != null) {
- parent.fillInBytes(arrayToFill);
- }
- }
-
-
- /************************** printing ***********************/
-
- @Override
- public String toString() {
- String s = "";
- if (parent == null) {
- s += "R ";
- } else {
- s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray());
- }
- s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]";
- if (numOccurrences > 0) {
- s += "x" + numOccurrences;
- }
- return s;
- }
-
- public String getPaddedTokenAndOccurrenceString() {
- StringBuilder sb = new StringBuilder();
- sb.append(getBnlIndicator(true));
- sb.append(Strings.padFront(numOccurrences + "", ' ', 3));
- sb.append(Strings.padFront(nodeDepth + "", ' ', 3));
- if (outputArrayOffset >= 0) {
- sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3));
- }
- sb.append(" ");
- for (int i = 0; i < tokenStartOffset; ++i) {
- sb.append(" ");
- }
- sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_"));
- return sb.toString();
- }
-
- public String getBnlIndicator(boolean indent) {
- if (indent) {
- if (isNub()) {
- return " N ";
- }
- return isBranch() ? "B " : " L";
- }
- if (isNub()) {
- return "N";
- }
- return isBranch() ? "B" : "L";
- }
-
-
- /********************** count different node types ********************/
-
- public int getNumBranchNodesIncludingThisNode() {
- if (isLeaf()) {
- return 0;
- }
- int totalFromThisPlusChildren = isBranch() ? 1 : 0;
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode();
- }
- return totalFromThisPlusChildren;
- }
-
- public int getNumNubNodesIncludingThisNode() {
- if (isLeaf()) {
- return 0;
- }
- int totalFromThisPlusChildren = isNub() ? 1 : 0;
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode();
- }
- return totalFromThisPlusChildren;
- }
-
- public int getNumLeafNodesIncludingThisNode() {
- if (isLeaf()) {
- return 1;
- }
- int totalFromChildren = 0;
- for (int i = 0; i < children.size(); ++i) {
- TokenizerNode child = children.get(i);
- totalFromChildren += child.getNumLeafNodesIncludingThisNode();
- }
- return totalFromChildren;
- }
-
-
- /*********************** simple read-only methods *******************************/
-
- public int getNodeDepth() {
- return nodeDepth;
- }
-
- public int getTokenLength() {
- return token.getLength();
- }
-
- public boolean hasOccurrences() {
- return numOccurrences > 0;
- }
-
- public boolean isRoot() {
- return this.parent == null;
- }
-
- public int getNumChildren() {
- return CollectionUtils.nullSafeSize(children);
- }
-
- public TokenizerNode getLastChild() {
- if (CollectionUtils.isEmpty(children)) {
- return null;
- }
- return CollectionUtils.getLast(children);
- }
-
- public boolean isLeaf() {
- return CollectionUtils.isEmpty(children) && hasOccurrences();
- }
-
- public boolean isBranch() {
- return CollectionUtils.notEmpty(children) && !hasOccurrences();
- }
-
- public boolean isNub() {
- return CollectionUtils.notEmpty(children) && hasOccurrences();
- }
-
-
- /********************** simple mutation methods *************************/
-
- /**
- * Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by
- * an external class without going through the process of detecting a repeat if it is a known
- * repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows
- * the new cells are part of the current row.
- * @param d increment by this amount
- */
- public void incrementNumOccurrences(int d) {
- numOccurrences += d;
- }
-
-
- /************************* autogenerated get/set ******************/
-
- public int getTokenOffset() {
- return tokenStartOffset;
- }
-
- public TokenizerNode getParent() {
- return parent;
- }
-
- public ByteRange getToken() {
- return token;
- }
-
- public int getNumOccurrences() {
- return numOccurrences;
- }
-
- public void setParent(TokenizerNode parent) {
- this.parent = parent;
- }
-
- public void setNumOccurrences(int numOccurrences) {
- this.numOccurrences = numOccurrences;
- }
-
- public ArrayList getChildren() {
- return children;
- }
-
- public long getId() {
- return id;
- }
-
- public int getFirstInsertionIndex() {
- return firstInsertionIndex;
- }
-
- public void setFirstInsertionIndex(int firstInsertionIndex) {
- this.firstInsertionIndex = firstInsertionIndex;
- }
-
- public int getNegativeIndex() {
- return negativeIndex;
- }
-
- public void setNegativeIndex(int negativeIndex) {
- this.negativeIndex = negativeIndex;
- }
-
- public int getOutputArrayOffset() {
- return outputArrayOffset;
- }
-
- public void setOutputArrayOffset(int outputArrayOffset) {
- this.outputArrayOffset = outputArrayOffset;
- }
-
- public void setId(long id) {
- this.id = id;
- }
-
- public void setBuilder(Tokenizer builder) {
- this.builder = builder;
- }
-
- public void setTokenOffset(int tokenOffset) {
- this.tokenStartOffset = tokenOffset;
- }
-
- public void setToken(ByteRange token) {
- this.token = token;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java
deleted file mode 100644
index 6494ba1..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.tokenize;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-
-/**
- * Warning: currently unused, but code is valid. Pending performance testing on more data sets.
- *
- * Where is the key relative to our current position in the tree. For example, the current tree node
- * is "BEFORE" the key we are seeking
- */
-@InterfaceAudience.Private
-public enum TokenizerRowSearchPosition {
-
- AFTER,//the key is after this tree node, so keep searching
- BEFORE,//in a binary search, this tells us to back up
- MATCH,//the current node is a full match
- NO_MATCH,//might as well return a value more informative than null
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java
deleted file mode 100644
index e7f5433..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.encode.tokenize;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-
-/**
- * for recursively searching a PtBuilder
- */
-@InterfaceAudience.Private
-public class TokenizerRowSearchResult{
-
- /************ fields ************************/
-
- protected TokenizerRowSearchPosition difference;
- protected TokenizerNode matchingNode;
-
-
- /*************** construct *****************/
-
- public TokenizerRowSearchResult() {
- }
-
- public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) {
- this.difference = difference;
- }
-
- public TokenizerRowSearchResult(TokenizerNode matchingNode) {
- this.difference = TokenizerRowSearchPosition.MATCH;
- this.matchingNode = matchingNode;
- }
-
-
- /*************** methods **********************/
-
- public boolean isMatch() {
- return TokenizerRowSearchPosition.MATCH == difference;
- }
-
-
- /************* get/set ***************************/
-
- public TokenizerRowSearchPosition getDifference() {
- return difference;
- }
-
- public TokenizerNode getMatchingNode() {
- return matchingNode;
- }
-
- public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) {
- this.difference = difference;
- this.matchingNode = matchingNode;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java
deleted file mode 100644
index a8f0541..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.scanner;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-
-/**
- * An indicator of the state of the scanner after an operation such as nextCell() or
- * positionAt(..). For example:
- *
- *
In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
- * it should load the next block.
- *
In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
- *
- *
In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
- * next region.
- *
- */
-@InterfaceAudience.Public
-@InterfaceStability.Evolving
-public enum CellScannerPosition {
-
- /**
- * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
- * cell.
- */
- BEFORE_FIRST,
-
- /**
- * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
- * rather it is the nearest cell before the requested cell.
- */
- BEFORE,
-
- /**
- * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
- * positionAt(..).
- */
- AT,
-
- /**
- * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
- * rather it is the nearest cell after the requested cell.
- */
- AFTER,
-
- /**
- * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
- */
- AFTER_LAST
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java
deleted file mode 100644
index e55c559..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.scanner;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.Cell;
-
-/**
- * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that
- * the implementation is able to navigate between cells without iterating through every cell.
- */
-@InterfaceAudience.Private
-public interface CellSearcher extends ReversibleCellScanner {
- /**
- * Reset any state in the scanner so it appears it was freshly opened.
- */
- void resetToBeforeFirstEntry();
-
- /**
- * Do everything within this scanner's power to find the key. Look forward and backwards.
- *
- * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state.
- *
- * @param key position the CellScanner exactly on this key
- * @return true if the cell existed and getCurrentCell() holds a valid cell
- */
- boolean positionAt(Cell key);
-
- /**
- * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no
- * exact match.
- *
- * @param key position the CellScanner on this key or the closest cell before
- * @return AT if exact match
- * BEFORE if on last cell before key
- * BEFORE_FIRST if key was before the first cell in this scanner's scope
- */
- CellScannerPosition positionAtOrBefore(Cell key);
-
- /**
- * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact
- * match.
- *
- * @param key position the CellScanner on this key or the closest cell after
- * @return AT if exact match
- * AFTER if on first cell after key
- * AFTER_LAST if key was after the last cell in this scanner's scope
- */
- CellScannerPosition positionAtOrAfter(Cell key);
-
- /**
- * Note: Added for backwards compatibility with
- * {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek}
- *
- * Look for the key, but only look after the current position. Probably not needed for an
- * efficient tree implementation, but is important for implementations without random access such
- * as unencoded KeyValue blocks.
- *
- * @param key position the CellScanner exactly on this key
- * @return true if getCurrent() holds a valid cell
- */
- boolean seekForwardTo(Cell key);
-
- /**
- * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
- * exact match.
- *
- * @param key
- * @return AT if exact match
- * AFTER if on first cell after key
- * AFTER_LAST if key was after the last cell in this scanner's scope
- */
- CellScannerPosition seekForwardToOrBefore(Cell key);
-
- /**
- * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no
- * exact match.
- *
- * @param key
- * @return AT if exact match
- * AFTER if on first cell after key
- * AFTER_LAST if key was after the last cell in this scanner's scope
- */
- CellScannerPosition seekForwardToOrAfter(Cell key);
-
- /**
- * Note: This may not be appropriate to have in the interface. Need to investigate.
- *
- * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST.
- * This is used by tests and for handling certain edge cases.
- */
- void positionAfterLastCell();
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java
deleted file mode 100644
index b4463d8..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.codec.prefixtree.scanner;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.CellScanner;
-
-/**
- * An extension of CellScanner indicating the scanner supports iterating backwards through cells.
- *
- * Note: This was not added to suggest that HBase should support client facing reverse Scanners,
- * but
- * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing
- * up if the positionAt(..) method goes past the requested cell.
- */
-@InterfaceAudience.Private
-public interface ReversibleCellScanner extends CellScanner {
-
- /**
- * Try to position the scanner one Cell before the current position.
- * @return true if the operation was successful, meaning getCurrentCell() will return a valid
- * Cell.
- * false if there were no previous cells, meaning getCurrentCell() will return null.
- * Scanner position will be
- * {@link org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
- */
- boolean previous();
-
- /**
- * Try to position the scanner in the row before the current row.
- * @param endOfRow true for the last cell in the previous row; false for the first cell
- * @return true if the operation was successful, meaning getCurrentCell() will return a valid
- * Cell.
- * false if there were no previous cells, meaning getCurrentCell() will return null.
- * Scanner position will be
- * {@link org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST}
- */
- boolean previousRow(boolean endOfRow);
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java
deleted file mode 100644
index b2d1526..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.byterange;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ArrayUtils;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.Bytes;
-
-import com.google.common.collect.Lists;
-
-/**
- * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
- * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
- *
- * Current implementations are {@link org.apache.hbase.util.byterange.impl.ByteRangeHashSet} and
- * {@link org.apache.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
- * trie-oriented ByteRangeTrieSet, etc
- */
-@InterfaceAudience.Private
-public abstract class ByteRangeSet {
-
- /******************** fields **********************/
-
- protected byte[] byteAppender;
- protected int numBytes;
-
- protected Map uniqueIndexByUniqueRange;
-
- protected ArrayList uniqueRanges;
- protected int numUniqueRanges = 0;
-
- protected int[] uniqueRangeIndexByInsertionId;
- protected int numInputs;
-
- protected List sortedIndexByUniqueIndex;
- protected int[] sortedIndexByInsertionId;
- protected ArrayList sortedRanges;
-
-
- /****************** construct **********************/
-
- protected ByteRangeSet() {
- this.byteAppender = new byte[0];
- this.uniqueRanges = Lists.newArrayList();
- this.uniqueRangeIndexByInsertionId = new int[0];
- this.sortedIndexByUniqueIndex = Lists.newArrayList();
- this.sortedIndexByInsertionId = new int[0];
- this.sortedRanges = Lists.newArrayList();
- }
-
- public void reset() {
- numBytes = 0;
- uniqueIndexByUniqueRange.clear();
- numUniqueRanges = 0;
- numInputs = 0;
- sortedIndexByUniqueIndex.clear();
- sortedRanges.clear();
- }
-
-
- /*************** abstract *************************/
-
- public abstract void addToSortedRanges();
-
-
- /**************** methods *************************/
-
- /**
- * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
- * insert it into the tracking Map uniqueIndexByUniqueRange.
- */
- public void add(ByteRange bytes) {
- Integer index = uniqueIndexByUniqueRange.get(bytes);
- if (index == null) {
- index = store(bytes);
- }
- int minLength = numInputs + 1;
- uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
- minLength, 2 * minLength);
- uniqueRangeIndexByInsertionId[numInputs] = index;
- ++numInputs;
- }
-
- protected int store(ByteRange bytes) {
- int indexOfNewElement = numUniqueRanges;
- if (uniqueRanges.size() <= numUniqueRanges) {
- uniqueRanges.add(new ByteRange());
- }
- ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
- int neededBytes = numBytes + bytes.getLength();
- byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
- bytes.deepCopyTo(byteAppender, numBytes);
- storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
- numBytes += bytes.getLength();
- uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
- int newestUniqueIndex = numUniqueRanges;
- ++numUniqueRanges;
- return newestUniqueIndex;
- }
-
- public ByteRangeSet compile() {
- addToSortedRanges();
- for (int i = 0; i < sortedRanges.size(); ++i) {
- sortedIndexByUniqueIndex.add(null);// need to grow the size
- }
- // TODO move this to an invert(int[]) util method
- for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
- int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
- sortedIndexByUniqueIndex.set(uniqueIndex, i);
- }
- sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
- numInputs);
- for (int i = 0; i < numInputs; ++i) {
- int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
- int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
- sortedIndexByInsertionId[i] = sortedIndex;
- }
- return this;
- }
-
- public int getSortedIndexForInsertionId(int insertionId) {
- return sortedIndexByInsertionId[insertionId];
- }
-
- public int size() {
- return uniqueIndexByUniqueRange.size();
- }
-
-
- /***************** standard methods ************************/
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- int i = 0;
- for (ByteRange r : sortedRanges) {
- if (i > 0) {
- sb.append("\n");
- }
- sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
- ++i;
- }
- sb.append("\ntotalSize:" + numBytes);
- sb.append("\navgSize:" + getAvgSize());
- return sb.toString();
- }
-
-
- /**************** get/set *****************************/
-
- public ArrayList getSortedRanges() {
- return sortedRanges;
- }
-
- public long getAvgSize() {
- return numBytes / numUniqueRanges;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java
deleted file mode 100644
index 8787f39..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.byterange.impl;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.IterableUtils;
-import org.apache.hbase.util.byterange.ByteRangeSet;
-
-/**
- * This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces
- * garbage when adding a new element to it. We can probably create a tighter implementation without
- * pointers or garbage.
- */
-@InterfaceAudience.Private
-public class ByteRangeHashSet extends ByteRangeSet {
-
- /************************ constructors *****************************/
-
- public ByteRangeHashSet() {
- this.uniqueIndexByUniqueRange = new HashMap();
- }
-
- public ByteRangeHashSet(List rawByteArrays) {
- for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) {
- add(in);
- }
- }
-
- @Override
- public void addToSortedRanges() {
- sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
- Collections.sort(sortedRanges);
- }
-
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java
deleted file mode 100644
index 9499e56..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.byterange.impl;
-
-import java.util.List;
-import java.util.TreeMap;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.util.ByteRange;
-import org.apache.hadoop.hbase.util.CollectionUtils;
-import org.apache.hadoop.hbase.util.IterableUtils;
-import org.apache.hbase.util.byterange.ByteRangeSet;
-
-/**
- * Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet.
- */
-@InterfaceAudience.Private
-public class ByteRangeTreeSet extends ByteRangeSet {
-
- /************************ constructors *****************************/
-
- public ByteRangeTreeSet() {
- this.uniqueIndexByUniqueRange = new TreeMap();
- }
-
- public ByteRangeTreeSet(List rawByteArrays) {
- this();//needed to initialize the TreeSet
- for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){
- add(in);
- }
- }
-
- @Override
- public void addToSortedRanges() {
- sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet()));
- }
-
-}
\ No newline at end of file
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java
deleted file mode 100644
index 278ac55..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.vint;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * UFInt is an abbreviation for Unsigned Fixed-width Integer.
- *
- * This class converts between positive ints and 1-4 bytes that represent the int. All input ints
- * must be positive. Max values stored in N bytes are:
- *
- * N=1: 2^8 => 256
- * N=2: 2^16 => 65,536
- * N=3: 2^24 => 16,777,216
- * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE)
- *
- * This was created to get most of the memory savings of a variable length integer when encoding
- * an array of input integers, but to fix the number of bytes for each integer to the number needed
- * to store the maximum integer in the array. This enables a binary search to be performed on the
- * array of encoded integers.
- *
- * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if
- * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the
- * numbers will also require 2 bytes.
- *
- * warnings:
- * * no input validation for max performance
- * * no negatives
- */
-@InterfaceAudience.Private
-public class UFIntTool {
-
- private static final int NUM_BITS_IN_LONG = 64;
-
- public static long maxValueForNumBytes(int numBytes) {
- return (1L << (numBytes * 8)) - 1;
- }
-
- public static int numBytes(final long value) {
- if (value == 0) {// 0 doesn't work with the formula below
- return 1;
- }
- return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8;
- }
-
- public static byte[] getBytes(int outputWidth, final long value) {
- byte[] bytes = new byte[outputWidth];
- writeBytes(outputWidth, value, bytes, 0);
- return bytes;
- }
-
- public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) {
- bytes[offset + outputWidth - 1] = (byte) value;
- for (int i = outputWidth - 2; i >= 0; --i) {
- bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8);
- }
- }
-
- private static final long[] MASKS = new long[] {
- (long) 255,
- (long) 255 << 8,
- (long) 255 << 16,
- (long) 255 << 24,
- (long) 255 << 32,
- (long) 255 << 40,
- (long) 255 << 48,
- (long) 255 << 56
- };
-
- public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException {
- for (int i = outputWidth - 1; i >= 0; --i) {
- os.write((byte) ((value & MASKS[i]) >>> (8 * i)));
- }
- }
-
- public static long fromBytes(final byte[] bytes) {
- long value = 0;
- value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int
- for (int i = 1; i < bytes.length; ++i) {
- value <<= 8;
- value |= bytes[i] & 0xff;
- }
- return value;
- }
-
- public static long fromBytes(final byte[] bytes, final int offset, final int width) {
- long value = 0;
- value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int
- for (int i = 1; i < width; ++i) {
- value <<= 8;
- value |= bytes[i + offset] & 0xff;
- }
- return value;
- }
-
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java
deleted file mode 100644
index c0d29e4..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.vint;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
- * bit of the current byte is 1, then there is at least one more byte.
- */
-@InterfaceAudience.Private
-public class UVIntTool {
-
- public static final byte
- BYTE_7_RIGHT_BITS_SET = 127,
- BYTE_LEFT_BIT_SET = -128;
-
- public static final long
- INT_7_RIGHT_BITS_SET = 127,
- INT_8TH_BIT_SET = 128;
-
- public static final byte[]
- MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 };
-
- /********************* int -> bytes **************************/
-
- public static int numBytes(int in) {
- if (in == 0) {
- // doesn't work with the formula below
- return 1;
- }
- return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1)
- }
-
- public static byte[] getBytes(int value) {
- int numBytes = numBytes(value);
- byte[] bytes = new byte[numBytes];
- int remainder = value;
- for (int i = 0; i < numBytes - 1; ++i) {
- // set the left bit
- bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET);
- remainder >>= 7;
- }
- // do not set the left bit
- bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET);
- return bytes;
- }
-
- public static int writeBytes(int value, OutputStream os) throws IOException {
- int numBytes = numBytes(value);
- int remainder = value;
- for (int i = 0; i < numBytes - 1; ++i) {
- // set the left bit
- os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET));
- remainder >>= 7;
- }
- // do not set the left bit
- os.write((byte) (remainder & INT_7_RIGHT_BITS_SET));
- return numBytes;
- }
-
- /******************** bytes -> int **************************/
-
- public static int getInt(byte[] bytes) {
- return getInt(bytes, 0);
- }
-
- public static int getInt(byte[] bytes, int offset) {
- int value = 0;
- for (int i = 0;; ++i) {
- byte b = bytes[offset + i];
- int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
- shifted <<= 7 * i;
- value |= shifted;
- if (b >= 0) {
- break;
- }
- }
- return value;
- }
-
- public static int getInt(InputStream is) throws IOException {
- int value = 0;
- int i = 0;
- int b;
- do{
- b = is.read();
- int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
- shifted <<= 7 * i;
- value |= shifted;
- ++i;
- }while(b > Byte.MAX_VALUE);
- return value;
- }
-}
diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java
deleted file mode 100644
index ec95ae8..0000000
--- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hbase.util.vint;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left
- * bit of the current byte is 1, then there is at least one more byte.
- */
-@InterfaceAudience.Private
-public class UVLongTool{
-
- public static final byte
- BYTE_7_RIGHT_BITS_SET = 127,
- BYTE_LEFT_BIT_SET = -128;
-
- public static final long
- LONG_7_RIGHT_BITS_SET = 127,
- LONG_8TH_BIT_SET = 128;
-
- public static final byte[]
- MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 };
-
-
- /********************* long -> bytes **************************/
-
- public static int numBytes(long in) {// do a check for illegal arguments if not protected
- if (in == 0) {
- return 1;
- }// doesn't work with the formula below
- return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1)
- }
-
- public static byte[] getBytes(long value) {
- int numBytes = numBytes(value);
- byte[] bytes = new byte[numBytes];
- long remainder = value;
- for (int i = 0; i < numBytes - 1; ++i) {
- bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit
- remainder >>= 7;
- }
- bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit
- return bytes;
- }
-
- public static int writeBytes(long value, OutputStream os) throws IOException {
- int numBytes = numBytes(value);
- long remainder = value;
- for (int i = 0; i < numBytes - 1; ++i) {
- // set the left bit
- os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET));
- remainder >>= 7;
- }
- // do not set the left bit
- os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET));
- return numBytes;
- }
-
- /******************** bytes -> long **************************/
-
- public static long getLong(byte[] bytes) {
- return getLong(bytes, 0);
- }
-
- public static long getLong(byte[] bytes, int offset) {
- long value = 0;
- for (int i = 0;; ++i) {
- byte b = bytes[offset + i];
- long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
- shifted <<= 7 * i;
- value |= shifted;
- if (b >= 0) {
- break;
- }// first bit was 0, so that's the last byte in the VarLong
- }
- return value;
- }
-
- public static long getLong(InputStream is) throws IOException {
- long value = 0;
- int i = 0;
- int b;
- do {
- b = is.read();
- long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit
- shifted <<= 7 * i;
- value |= shifted;
- ++i;
- } while (b > Byte.MAX_VALUE);
- return value;
- }
-}
diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java
new file mode 100644
index 0000000..5bc4186
--- /dev/null
+++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.codec.keyvalue;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValueTestUtil;
+import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestKeyValueTool {
+
+ @Parameters
+ public static Collection