diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java new file mode 100644 index 0000000..0164306 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeBlockMeta.java @@ -0,0 +1,841 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.vint.UVIntTool; +import org.apache.hadoop.hbase.util.vint.UVLongTool; + +/** + * Information about the block. Stored at the beginning of the byte[]. Contains things + * like minimum timestamp and width of FInts in the row tree. + * + * Most fields stored in VInts that get decoded on the first access of each new block. + */ +@InterfaceAudience.Private +public class PrefixTreeBlockMeta { + + /******************* static fields ********************/ + + public static final int VERSION = 0; + + public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue + + public static final int + NUM_LONGS = 2, + NUM_INTS = 22, + NUM_SHORTS = 0,//keyValueTypeWidth not persisted + NUM_SINGLE_BYTES = 2, + MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS + + Bytes.SIZEOF_SHORT * NUM_SHORTS + + Bytes.SIZEOF_INT * NUM_INTS + + NUM_SINGLE_BYTES; + + + /**************** transient fields *********************/ + + protected int arrayOffset; + protected int bufferOffset; + + + /**************** persisted fields **********************/ + + // PrefixTree version to allow future format modifications + protected int version; + protected int numMetaBytes; + protected int numKeyValueBytes; + protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte + + // split the byte[] into 6 sections for the different data types + protected int numRowBytes; + protected int numFamilyBytes; + protected int numQualifierBytes; + protected int numTimestampBytes; + protected int numMvccVersionBytes; + protected int numValueBytes; + + // number of bytes in each section of fixed width FInts + protected int nextNodeOffsetWidth; + protected int familyOffsetWidth; + protected int qualifierOffsetWidth; + protected int timestampIndexWidth; + protected int mvccVersionIndexWidth; + protected int valueOffsetWidth; + protected int valueLengthWidth; + + // used to pre-allocate structures for reading + protected int rowTreeDepth; + protected int maxRowLength; + protected int maxQualifierLength; + + // the timestamp from which the deltas are calculated + protected long minTimestamp; + protected int timestampDeltaWidth; + protected long minMvccVersion; + protected int mvccVersionDeltaWidth; + + protected boolean allSameType; + protected byte allTypes; + + protected int numUniqueRows; + protected int numUniqueFamilies; + protected int numUniqueQualifiers; + + + /***************** constructors ********************/ + + public PrefixTreeBlockMeta() { + } + + public PrefixTreeBlockMeta(InputStream is) throws IOException{ + this.version = VERSION; + this.arrayOffset = 0; + this.bufferOffset = 0; + readVariableBytesFromInputStream(is); + } + + /** + * @param buffer positioned at start of PtBlockMeta + */ + public PrefixTreeBlockMeta(ByteBuffer buffer) { + initOnBlock(buffer); + } + + public void initOnBlock(ByteBuffer buffer) { + arrayOffset = buffer.arrayOffset(); + bufferOffset = buffer.position(); + readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset); + } + + + /**************** operate on each field **********************/ + + public int calculateNumMetaBytes(){ + int numBytes = 0; + numBytes += UVIntTool.numBytes(version); + numBytes += UVLongTool.numBytes(numMetaBytes); + numBytes += UVIntTool.numBytes(numKeyValueBytes); + ++numBytes;//os.write(getIncludesMvccVersion()); + + numBytes += UVIntTool.numBytes(numRowBytes); + numBytes += UVIntTool.numBytes(numFamilyBytes); + numBytes += UVIntTool.numBytes(numQualifierBytes); + numBytes += UVIntTool.numBytes(numTimestampBytes); + numBytes += UVIntTool.numBytes(numMvccVersionBytes); + numBytes += UVIntTool.numBytes(numValueBytes); + + numBytes += UVIntTool.numBytes(nextNodeOffsetWidth); + numBytes += UVIntTool.numBytes(familyOffsetWidth); + numBytes += UVIntTool.numBytes(qualifierOffsetWidth); + numBytes += UVIntTool.numBytes(timestampIndexWidth); + numBytes += UVIntTool.numBytes(mvccVersionIndexWidth); + numBytes += UVIntTool.numBytes(valueOffsetWidth); + numBytes += UVIntTool.numBytes(valueLengthWidth); + + numBytes += UVIntTool.numBytes(rowTreeDepth); + numBytes += UVIntTool.numBytes(maxRowLength); + numBytes += UVIntTool.numBytes(maxQualifierLength); + + numBytes += UVLongTool.numBytes(minTimestamp); + numBytes += UVIntTool.numBytes(timestampDeltaWidth); + numBytes += UVLongTool.numBytes(minMvccVersion); + numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth); + ++numBytes;//os.write(getAllSameTypeByte()); + ++numBytes;//os.write(allTypes); + + numBytes += UVIntTool.numBytes(numUniqueRows); + numBytes += UVIntTool.numBytes(numUniqueFamilies); + numBytes += UVIntTool.numBytes(numUniqueQualifiers); + return numBytes; + } + + public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{ + UVIntTool.writeBytes(version, os); + UVIntTool.writeBytes(numMetaBytes, os); + UVIntTool.writeBytes(numKeyValueBytes, os); + os.write(getIncludesMvccVersionByte()); + + UVIntTool.writeBytes(numRowBytes, os); + UVIntTool.writeBytes(numFamilyBytes, os); + UVIntTool.writeBytes(numQualifierBytes, os); + UVIntTool.writeBytes(numTimestampBytes, os); + UVIntTool.writeBytes(numMvccVersionBytes, os); + UVIntTool.writeBytes(numValueBytes, os); + + UVIntTool.writeBytes(nextNodeOffsetWidth, os); + UVIntTool.writeBytes(familyOffsetWidth, os); + UVIntTool.writeBytes(qualifierOffsetWidth, os); + UVIntTool.writeBytes(timestampIndexWidth, os); + UVIntTool.writeBytes(mvccVersionIndexWidth, os); + UVIntTool.writeBytes(valueOffsetWidth, os); + UVIntTool.writeBytes(valueLengthWidth, os); + + UVIntTool.writeBytes(rowTreeDepth, os); + UVIntTool.writeBytes(maxRowLength, os); + UVIntTool.writeBytes(maxQualifierLength, os); + + UVLongTool.writeBytes(minTimestamp, os); + UVIntTool.writeBytes(timestampDeltaWidth, os); + UVLongTool.writeBytes(minMvccVersion, os); + UVIntTool.writeBytes(mvccVersionDeltaWidth, os); + os.write(getAllSameTypeByte()); + os.write(allTypes); + + UVIntTool.writeBytes(numUniqueRows, os); + UVIntTool.writeBytes(numUniqueFamilies, os); + UVIntTool.writeBytes(numUniqueQualifiers, os); + } + + public void readVariableBytesFromInputStream(InputStream is) throws IOException{ + version = UVIntTool.getInt(is); + numMetaBytes = UVIntTool.getInt(is); + numKeyValueBytes = UVIntTool.getInt(is); + setIncludesMvccVersion((byte) is.read()); + + numRowBytes = UVIntTool.getInt(is); + numFamilyBytes = UVIntTool.getInt(is); + numQualifierBytes = UVIntTool.getInt(is); + numTimestampBytes = UVIntTool.getInt(is); + numMvccVersionBytes = UVIntTool.getInt(is); + numValueBytes = UVIntTool.getInt(is); + + nextNodeOffsetWidth = UVIntTool.getInt(is); + familyOffsetWidth = UVIntTool.getInt(is); + qualifierOffsetWidth = UVIntTool.getInt(is); + timestampIndexWidth = UVIntTool.getInt(is); + mvccVersionIndexWidth = UVIntTool.getInt(is); + valueOffsetWidth = UVIntTool.getInt(is); + valueLengthWidth = UVIntTool.getInt(is); + + rowTreeDepth = UVIntTool.getInt(is); + maxRowLength = UVIntTool.getInt(is); + maxQualifierLength = UVIntTool.getInt(is); + + minTimestamp = UVLongTool.getLong(is); + timestampDeltaWidth = UVIntTool.getInt(is); + minMvccVersion = UVLongTool.getLong(is); + mvccVersionDeltaWidth = UVIntTool.getInt(is); + + setAllSameType((byte) is.read()); + allTypes = (byte) is.read(); + + numUniqueRows = UVIntTool.getInt(is); + numUniqueFamilies = UVIntTool.getInt(is); + numUniqueQualifiers = UVIntTool.getInt(is); + } + + public void readVariableBytesFromArray(byte[] bytes, int offset) { + int position = offset; + + version = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(version); + numMetaBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numMetaBytes); + numKeyValueBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numKeyValueBytes); + setIncludesMvccVersion(bytes[position]); + ++position; + + numRowBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numRowBytes); + numFamilyBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numFamilyBytes); + numQualifierBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numQualifierBytes); + numTimestampBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numTimestampBytes); + numMvccVersionBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numMvccVersionBytes); + numValueBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numValueBytes); + + nextNodeOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(nextNodeOffsetWidth); + familyOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(familyOffsetWidth); + qualifierOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(qualifierOffsetWidth); + timestampIndexWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(timestampIndexWidth); + mvccVersionIndexWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(mvccVersionIndexWidth); + valueOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(valueOffsetWidth); + valueLengthWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(valueLengthWidth); + + rowTreeDepth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(rowTreeDepth); + maxRowLength = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(maxRowLength); + maxQualifierLength = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(maxQualifierLength); + + minTimestamp = UVLongTool.getLong(bytes, position); + position += UVLongTool.numBytes(minTimestamp); + timestampDeltaWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(timestampDeltaWidth); + minMvccVersion = UVLongTool.getLong(bytes, position); + position += UVLongTool.numBytes(minMvccVersion); + mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(mvccVersionDeltaWidth); + + setAllSameType(bytes[position]); + ++position; + allTypes = bytes[position]; + ++position; + + numUniqueRows = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueRows); + numUniqueFamilies = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueFamilies); + numUniqueQualifiers = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueQualifiers); + } + + //TODO method that can read directly from ByteBuffer instead of InputStream + + + /*************** methods *************************/ + + public int getKeyValueTypeWidth() { + return allSameType ? 0 : 1; + } + + public byte getIncludesMvccVersionByte() { + return includesMvccVersion ? (byte) 1 : (byte) 0; + } + + public void setIncludesMvccVersion(byte includesMvccVersionByte) { + includesMvccVersion = includesMvccVersionByte != 0; + } + + public byte getAllSameTypeByte() { + return allSameType ? (byte) 1 : (byte) 0; + } + + public void setAllSameType(byte allSameTypeByte) { + allSameType = allSameTypeByte != 0; + } + + public boolean isAllSameTimestamp() { + return timestampIndexWidth == 0; + } + + public boolean isAllSameMvccVersion() { + return mvccVersionIndexWidth == 0; + } + + public void setTimestampFields(LongEncoder encoder){ + this.minTimestamp = encoder.getMin(); + this.timestampIndexWidth = encoder.getBytesPerIndex(); + this.timestampDeltaWidth = encoder.getBytesPerDelta(); + this.numTimestampBytes = encoder.getTotalCompressedBytes(); + } + + public void setMvccVersionFields(LongEncoder encoder){ + this.minMvccVersion = encoder.getMin(); + this.mvccVersionIndexWidth = encoder.getBytesPerIndex(); + this.mvccVersionDeltaWidth = encoder.getBytesPerDelta(); + this.numMvccVersionBytes = encoder.getTotalCompressedBytes(); + } + + + /*************** Object methods *************************/ + + /** + * Generated by Eclipse + */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj; + if (allSameType != other.allSameType) + return false; + if (allTypes != other.allTypes) + return false; + if (arrayOffset != other.arrayOffset) + return false; + if (bufferOffset != other.bufferOffset) + return false; + if (valueLengthWidth != other.valueLengthWidth) + return false; + if (valueOffsetWidth != other.valueOffsetWidth) + return false; + if (familyOffsetWidth != other.familyOffsetWidth) + return false; + if (includesMvccVersion != other.includesMvccVersion) + return false; + if (maxQualifierLength != other.maxQualifierLength) + return false; + if (maxRowLength != other.maxRowLength) + return false; + if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth) + return false; + if (mvccVersionIndexWidth != other.mvccVersionIndexWidth) + return false; + if (minMvccVersion != other.minMvccVersion) + return false; + if (minTimestamp != other.minTimestamp) + return false; + if (nextNodeOffsetWidth != other.nextNodeOffsetWidth) + return false; + if (numValueBytes != other.numValueBytes) + return false; + if (numFamilyBytes != other.numFamilyBytes) + return false; + if (numMvccVersionBytes != other.numMvccVersionBytes) + return false; + if (numMetaBytes != other.numMetaBytes) + return false; + if (numQualifierBytes != other.numQualifierBytes) + return false; + if (numRowBytes != other.numRowBytes) + return false; + if (numTimestampBytes != other.numTimestampBytes) + return false; + if (numUniqueFamilies != other.numUniqueFamilies) + return false; + if (numUniqueQualifiers != other.numUniqueQualifiers) + return false; + if (numUniqueRows != other.numUniqueRows) + return false; + if (numKeyValueBytes != other.numKeyValueBytes) + return false; + if (qualifierOffsetWidth != other.qualifierOffsetWidth) + return false; + if (rowTreeDepth != other.rowTreeDepth) + return false; + if (timestampDeltaWidth != other.timestampDeltaWidth) + return false; + if (timestampIndexWidth != other.timestampIndexWidth) + return false; + if (version != other.version) + return false; + return true; + } + + /** + * Generated by Eclipse + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (allSameType ? 1231 : 1237); + result = prime * result + allTypes; + result = prime * result + arrayOffset; + result = prime * result + bufferOffset; + result = prime * result + valueLengthWidth; + result = prime * result + valueOffsetWidth; + result = prime * result + familyOffsetWidth; + result = prime * result + (includesMvccVersion ? 1231 : 1237); + result = prime * result + maxQualifierLength; + result = prime * result + maxRowLength; + result = prime * result + mvccVersionDeltaWidth; + result = prime * result + mvccVersionIndexWidth; + result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32)); + result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32)); + result = prime * result + nextNodeOffsetWidth; + result = prime * result + numValueBytes; + result = prime * result + numFamilyBytes; + result = prime * result + numMvccVersionBytes; + result = prime * result + numMetaBytes; + result = prime * result + numQualifierBytes; + result = prime * result + numRowBytes; + result = prime * result + numTimestampBytes; + result = prime * result + numUniqueFamilies; + result = prime * result + numUniqueQualifiers; + result = prime * result + numUniqueRows; + result = prime * result + numKeyValueBytes; + result = prime * result + qualifierOffsetWidth; + result = prime * result + rowTreeDepth; + result = prime * result + timestampDeltaWidth; + result = prime * result + timestampIndexWidth; + result = prime * result + version; + return result; + } + + /** + * Generated by Eclipse + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("PtBlockMeta [arrayOffset="); + builder.append(arrayOffset); + builder.append(", bufferOffset="); + builder.append(bufferOffset); + builder.append(", version="); + builder.append(version); + builder.append(", numMetaBytes="); + builder.append(numMetaBytes); + builder.append(", numKeyValueBytes="); + builder.append(numKeyValueBytes); + builder.append(", includesMvccVersion="); + builder.append(includesMvccVersion); + builder.append(", numRowBytes="); + builder.append(numRowBytes); + builder.append(", numFamilyBytes="); + builder.append(numFamilyBytes); + builder.append(", numQualifierBytes="); + builder.append(numQualifierBytes); + builder.append(", numTimestampBytes="); + builder.append(numTimestampBytes); + builder.append(", numMvccVersionBytes="); + builder.append(numMvccVersionBytes); + builder.append(", numValueBytes="); + builder.append(numValueBytes); + builder.append(", nextNodeOffsetWidth="); + builder.append(nextNodeOffsetWidth); + builder.append(", familyOffsetWidth="); + builder.append(familyOffsetWidth); + builder.append(", qualifierOffsetWidth="); + builder.append(qualifierOffsetWidth); + builder.append(", timestampIndexWidth="); + builder.append(timestampIndexWidth); + builder.append(", mvccVersionIndexWidth="); + builder.append(mvccVersionIndexWidth); + builder.append(", valueOffsetWidth="); + builder.append(valueOffsetWidth); + builder.append(", valueLengthWidth="); + builder.append(valueLengthWidth); + builder.append(", rowTreeDepth="); + builder.append(rowTreeDepth); + builder.append(", maxRowLength="); + builder.append(maxRowLength); + builder.append(", maxQualifierLength="); + builder.append(maxQualifierLength); + builder.append(", minTimestamp="); + builder.append(minTimestamp); + builder.append(", timestampDeltaWidth="); + builder.append(timestampDeltaWidth); + builder.append(", minMvccVersion="); + builder.append(minMvccVersion); + builder.append(", mvccVersionDeltaWidth="); + builder.append(mvccVersionDeltaWidth); + builder.append(", allSameType="); + builder.append(allSameType); + builder.append(", allTypes="); + builder.append(allTypes); + builder.append(", numUniqueRows="); + builder.append(numUniqueRows); + builder.append(", numUniqueFamilies="); + builder.append(numUniqueFamilies); + builder.append(", numUniqueQualifiers="); + builder.append(numUniqueQualifiers); + builder.append("]"); + return builder.toString(); + } + + + /************** absolute getters *******************/ + + public int getAbsoluteMetaOffset() { + return arrayOffset + bufferOffset; + } + + public int getAbsoluteRowOffset() { + return getAbsoluteMetaOffset() + numMetaBytes; + } + + public int getAbsoluteFamilyOffset() { + return getAbsoluteRowOffset() + numRowBytes; + } + + public int getAbsoluteQualifierOffset() { + return getAbsoluteFamilyOffset() + numFamilyBytes; + } + + public int getAbsoluteTimestampOffset() { + return getAbsoluteQualifierOffset() + numQualifierBytes; + } + + public int getAbsoluteMvccVersionOffset() { + return getAbsoluteTimestampOffset() + numTimestampBytes; + } + + public int getAbsoluteValueOffset() { + return getAbsoluteMvccVersionOffset() + numMvccVersionBytes; + } + + + /*************** get/set ***************************/ + + public int getTimestampDeltaWidth() { + return timestampDeltaWidth; + } + + public void setTimestampDeltaWidth(int timestampDeltaWidth) { + this.timestampDeltaWidth = timestampDeltaWidth; + } + + public int getValueOffsetWidth() { + return valueOffsetWidth; + } + + public void setValueOffsetWidth(int dataOffsetWidth) { + this.valueOffsetWidth = dataOffsetWidth; + } + + public int getValueLengthWidth() { + return valueLengthWidth; + } + + public void setValueLengthWidth(int dataLengthWidth) { + this.valueLengthWidth = dataLengthWidth; + } + + public int getMaxRowLength() { + return maxRowLength; + } + + public void setMaxRowLength(int maxRowLength) { + this.maxRowLength = maxRowLength; + } + + public long getMinTimestamp() { + return minTimestamp; + } + + public void setMinTimestamp(long minTimestamp) { + this.minTimestamp = minTimestamp; + } + + public byte getAllTypes() { + return allTypes; + } + + public void setAllTypes(byte allTypes) { + this.allTypes = allTypes; + } + + public boolean isAllSameType() { + return allSameType; + } + + public void setAllSameType(boolean allSameType) { + this.allSameType = allSameType; + } + + public int getNextNodeOffsetWidth() { + return nextNodeOffsetWidth; + } + + public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) { + this.nextNodeOffsetWidth = nextNodeOffsetWidth; + } + + public int getNumRowBytes() { + return numRowBytes; + } + + public void setNumRowBytes(int numRowBytes) { + this.numRowBytes = numRowBytes; + } + + public int getNumTimestampBytes() { + return numTimestampBytes; + } + + public void setNumTimestampBytes(int numTimestampBytes) { + this.numTimestampBytes = numTimestampBytes; + } + + public int getNumValueBytes() { + return numValueBytes; + } + + public void setNumValueBytes(int numValueBytes) { + this.numValueBytes = numValueBytes; + } + + public int getNumMetaBytes() { + return numMetaBytes; + } + + public void setNumMetaBytes(int numMetaBytes) { + this.numMetaBytes = numMetaBytes; + } + + public int getArrayOffset() { + return arrayOffset; + } + + public void setArrayOffset(int arrayOffset) { + this.arrayOffset = arrayOffset; + } + + public int getBufferOffset() { + return bufferOffset; + } + + public void setBufferOffset(int bufferOffset) { + this.bufferOffset = bufferOffset; + } + + public int getNumKeyValueBytes() { + return numKeyValueBytes; + } + + public void setNumKeyValueBytes(int numKeyValueBytes) { + this.numKeyValueBytes = numKeyValueBytes; + } + + public int getRowTreeDepth() { + return rowTreeDepth; + } + + public void setRowTreeDepth(int rowTreeDepth) { + this.rowTreeDepth = rowTreeDepth; + } + + public int getNumMvccVersionBytes() { + return numMvccVersionBytes; + } + + public void setNumMvccVersionBytes(int numMvccVersionBytes) { + this.numMvccVersionBytes = numMvccVersionBytes; + } + + public int getMvccVersionDeltaWidth() { + return mvccVersionDeltaWidth; + } + + public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) { + this.mvccVersionDeltaWidth = mvccVersionDeltaWidth; + } + + public long getMinMvccVersion() { + return minMvccVersion; + } + + public void setMinMvccVersion(long minMvccVersion) { + this.minMvccVersion = minMvccVersion; + } + + public int getNumFamilyBytes() { + return numFamilyBytes; + } + + public void setNumFamilyBytes(int numFamilyBytes) { + this.numFamilyBytes = numFamilyBytes; + } + + public int getFamilyOffsetWidth() { + return familyOffsetWidth; + } + + public void setFamilyOffsetWidth(int familyOffsetWidth) { + this.familyOffsetWidth = familyOffsetWidth; + } + + public int getNumUniqueRows() { + return numUniqueRows; + } + + public void setNumUniqueRows(int numUniqueRows) { + this.numUniqueRows = numUniqueRows; + } + + public int getNumUniqueFamilies() { + return numUniqueFamilies; + } + + public void setNumUniqueFamilies(int numUniqueFamilies) { + this.numUniqueFamilies = numUniqueFamilies; + } + + public int getNumUniqueQualifiers() { + return numUniqueQualifiers; + } + + public void setNumUniqueQualifiers(int numUniqueQualifiers) { + this.numUniqueQualifiers = numUniqueQualifiers; + } + + public int getNumQualifierBytes() { + return numQualifierBytes; + } + + public void setNumQualifierBytes(int numQualifierBytes) { + this.numQualifierBytes = numQualifierBytes; + } + + public int getQualifierOffsetWidth() { + return qualifierOffsetWidth; + } + + public void setQualifierOffsetWidth(int qualifierOffsetWidth) { + this.qualifierOffsetWidth = qualifierOffsetWidth; + } + + public int getMaxQualifierLength() { + return maxQualifierLength; + } + + public void setMaxQualifierLength(int maxQualifierLength) { + this.maxQualifierLength = maxQualifierLength; + } + + public int getTimestampIndexWidth() { + return timestampIndexWidth; + } + + public void setTimestampIndexWidth(int timestampIndexWidth) { + this.timestampIndexWidth = timestampIndexWidth; + } + + public int getMvccVersionIndexWidth() { + return mvccVersionIndexWidth; + } + + public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) { + this.mvccVersionIndexWidth = mvccVersionIndexWidth; + } + + public int getVersion() { + return version; + } + + public void setVersion(int version) { + this.version = version; + } + + public boolean isIncludesMvccVersion() { + return includesMvccVersion; + } + + public void setIncludesMvccVersion(boolean includesMvccVersion) { + this.includesMvccVersion = includesMvccVersion; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java new file mode 100644 index 0000000..2aa5a2b --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KeyComparator; +import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator; +import org.apache.hadoop.hbase.KeyValue.RootKeyComparator; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; +import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.io.RawComparator; + +/** + * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or + * package changes. + *

+ * PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point + * for PrefixTree encoding and decoding. Encoding is delegated to instances of + * {@link PrefixTreeEncoder}, and decoding is delegated to instances of + * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are + * created and recycled by static PtEncoderFactory and PtDecoderFactory. + */ +@InterfaceAudience.Private +public class PrefixTreeCodec implements DataBlockEncoder{ + + /** + * no-arg constructor for reflection + */ + public PrefixTreeCodec() { + } + + /** + * Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar + * enough with the concept of the HFileBlockEncodingContext. + */ + @Override + public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion, + HFileBlockEncodingContext blkEncodingCtx) throws IOException { + if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { + throw new IOException(this.getClass().getName() + " only accepts " + + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context."); + } + + HFileBlockDefaultEncodingContext encodingCtx + = (HFileBlockDefaultEncodingContext) blkEncodingCtx; + encodingCtx.prepareEncoding(); + DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder(); + internalEncodeKeyValues(dataOut, in, includesMvccVersion); + + //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE? + if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { + encodingCtx.postEncoding(BlockType.ENCODED_DATA); + } else { + encodingCtx.postEncoding(BlockType.DATA); + } + } + + private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, + ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException { + rawKeyValues.rewind(); + PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); + + try{ + KeyValue kv; + while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) { + builder.write(kv); + } + builder.flush(); + }finally{ + EncoderFactory.checkIn(builder); + } + } + + + @Override + public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion) + throws IOException { + return decodeKeyValues(source, 0, 0, includesMvccVersion); + } + + + /** + * I don't think this method is called during normal HBase operation, so efficiency is not + * important. + */ + @Override + public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, boolean includesMvccVersion) throws IOException { + ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste + sourceAsBuffer.mark(); + PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer); + sourceAsBuffer.rewind(); + int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes(); + byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader]; + ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader); + result.rewind(); + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion); + while (searcher.advance()) { + KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current()); + // needs to be modified for DirectByteBuffers. no existing methods to + // write VLongs to byte[] + int offset = result.arrayOffset() + result.position(); + KeyValueUtil.appendToByteArray(currentCell, result.array(), offset); + int keyValueLength = KeyValueUtil.length(currentCell); + ByteBufferUtils.skip(result, keyValueLength); + offset += keyValueLength; + if (includesMvccVersion) { + ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion()); + } + } + result.position(result.limit());//make it appear as if we were appending + return result; + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + block.rewind(); + PrefixTreeArraySearcher searcher = null; + try { + //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will + searcher = DecoderFactory.checkOut(block, true); + if (!searcher.positionAtFirstCell()) { + return null; + } + return KeyValueUtil.copyKeyToNewByteBuffer(searcher.current()); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + @Override + public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm, + DataBlockEncoding encoding, byte[] header) { + if(DataBlockEncoding.PREFIX_TREE != encoding){ + //i'm not sure why encoding is in the interface. Each encoder implementation should probably + //know it's encoding type + throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported"); + } + return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header); + } + + @Override + public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) { + return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + } + + /** + * Is this the correct handling of an illegal comparator? How to prevent that from getting all + * the way to this point. + */ + @Override + public EncodedSeeker createSeeker(RawComparator comparator, boolean includesMvccVersion) { + if(! (comparator instanceof KeyComparator)){ + throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator"); + } + if(comparator instanceof MetaKeyComparator){ + throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META " + +"table"); + } + if(comparator instanceof RootKeyComparator){ + throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT " + +"table"); + } + + return new PrefixTreeSeeker(includesMvccVersion); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java new file mode 100644 index 0000000..a46a34a --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; + +/** + * These methods have the same definition as any implementation of the EncodedSeeker. + * + * In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It + * currently returns a new KeyValue object each time getKeyValue is called. This is not horrible, + * but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in + * the data from the PrefixTreeCell. It is somewhat heavyweight right now. + */ +@InterfaceAudience.Private +public class PrefixTreeSeeker implements EncodedSeeker { + + protected ByteBuffer block; + protected boolean includeMvccVersion; + protected PrefixTreeArraySearcher ptSearcher; + + public PrefixTreeSeeker(boolean includeMvccVersion) { + this.includeMvccVersion = includeMvccVersion; + } + + @Override + public void setCurrentBuffer(ByteBuffer fullBlockBuffer) { + block = fullBlockBuffer; + ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion); + rewind(); + } + + /** + * Currently unused. + *

+ * TODO performance leak. should reuse the searchers. hbase does not currently have a hook where + * this can be called + */ + public void releaseCurrentSearcher(){ + DecoderFactory.checkIn(ptSearcher); + } + + + @Override + public ByteBuffer getKeyDeepCopy() { + return KeyValueUtil.copyKeyToNewByteBuffer(ptSearcher.current()); + } + + + @Override + public ByteBuffer getValueShallowCopy() { + return CellUtil.getValueBufferShallowCopy(ptSearcher.current()); + } + + /** + * currently must do deep copy into new array + */ + @Override + public ByteBuffer getKeyValueBuffer() { + return KeyValueUtil.copyToNewByteBuffer(ptSearcher.current()); + } + + /** + * currently must do deep copy into new array + */ + @Override + public KeyValue getKeyValue() { + return KeyValueUtil.copyToNewKeyValue(ptSearcher.current()); + } + + /** + * Currently unused. + *

+ * A nice, lightweight reference, though the underlying cell is transient. This method may return + * the same reference to the backing PrefixTreeCell repeatedly, while other implementations may + * return a different reference for each Cell. + *

+ * The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to + * use this method instead of the getKeyValue() methods above. + */ + public Cell get() { + return ptSearcher.current(); + } + + @Override + public void rewind() { + ptSearcher.positionAtFirstCell(); + } + + @Override + public boolean next() { + return ptSearcher.advance(); + } + +// @Override + public boolean advance() { + return ptSearcher.advance(); + } + + + private static final boolean USE_POSITION_BEFORE = false; + + /** + * Seek forward only (should be called reseekToKeyInBlock?). + *

+ * If the exact key is found look at the seekBefore variable and:
+ * - if true: go to the previous key if it's true
+ * - if false: stay on the exact key + *

+ * If the exact key is not found, then go to the previous key *if possible*, but remember to + * leave the scanner in a valid state if possible. + *

+ * @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker + * @param offset offset into the keyOnlyBytes array + * @param length number of bytes of the keyOnlyBytes array to use + * @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up 1 Cell + * @return 0 if the seeker is on the exact key
+ * 1 if the seeker is not on the key for any reason, including seekBefore being true + */ + @Override + public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch) { + if (USE_POSITION_BEFORE) { + return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length, + forceBeforeOnExactMatch); + }else{ + return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length, + forceBeforeOnExactMatch); + } + } + + + + /* + * Support both of these options since the underlying PrefixTree supports both. Possibly + * expand the EncodedSeeker to utilize them both. + */ + + protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch){ + // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell + KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + + CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv); + + if(CellScannerPosition.AT == position){ + if (forceBeforeOnExactMatch) { + ptSearcher.previous(); + return 1; + } + return 0; + } + + return 1; + } + + + protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch){ + // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell + KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + + //should probably switch this to use the seekForwardToOrBefore method + CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv); + + if(CellScannerPosition.AT == position){ + if (forceBeforeOnExactMatch) { + ptSearcher.previous(); + return 1; + } + return 0; + + } + + if(CellScannerPosition.AFTER == position){ + if(!ptSearcher.isBeforeFirst()){ + ptSearcher.previous(); + } + return 1; + } + + if(position == CellScannerPosition.AFTER_LAST){ + return 1; + } + + throw new RuntimeException("unexpected CellScannerPosition:"+position); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java new file mode 100644 index 0000000..e1a92e0 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/ArraySearcherPool.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import java.nio.ByteBuffer; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of + * objects and 1 is needed for each HFile during a Get operation. With tens of thousands of + * Gets/second, reusing these searchers may save a lot of young gen collections. + *

+ * Alternative implementation would be a ByteBufferSearcherPool (not implemented yet). + */ +@InterfaceAudience.Private +public class ArraySearcherPool { + + /** + * One decoder is needed for each storefile for each Get operation so we may need hundreds at the + * same time, however, decoding is a CPU bound activity so should limit this to something in the + * realm of maximum reasonable active threads. + */ + private static final Integer MAX_POOL_SIZE = 1000; + + protected Queue pool + = new LinkedBlockingQueue(MAX_POOL_SIZE); + + public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) { + PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty + searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion); + return searcher; + } + + public void checkIn(PrefixTreeArraySearcher searcher) { + searcher.releaseBlockReference(); + pool.offer(searcher); + } + + @Override + public String toString() { + return ("poolSize:" + pool.size()); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java new file mode 100644 index 0000000..90c22b7 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; + +/** + * Static wrapper class for the ArraySearcherPool. + */ +@InterfaceAudience.Private +public class DecoderFactory { + + private static final ArraySearcherPool POOL = new ArraySearcherPool(); + + //TODO will need a PrefixTreeSearcher on top of CellSearcher + public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer, + boolean includeMvccVersion) { + if (buffer.isDirect()) { + throw new IllegalArgumentException("DirectByteBuffers not supported yet"); + // TODO implement PtByteBufferBlockScanner + } + + PrefixTreeArraySearcher searcher = POOL.checkOut(buffer, + includeMvccVersion); + return searcher; + } + + public static void checkIn(CellSearcher pSearcher) { + if (pSearcher == null) { + return; + } + if (! (pSearcher instanceof PrefixTreeArraySearcher)) { + throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to " + +DecoderFactory.class); + } + PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher; + POOL.checkIn(searcher); + } + + + /**************************** helper ******************************/ + + public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer, + PrefixTreeArraySearcher searcher, boolean includeMvccVersion) { + if (searcher == null) { + PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer); + searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(), + blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength()); + searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); + return searcher; + } + + PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta(); + blockMeta.initOnBlock(buffer); + if (!searcher.areBuffersBigEnough()) { + int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(), + searcher.getMaxRowTreeStackNodes()); + int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength()); + int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(), + searcher.getQualifierBufferLength()); + searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength, + qualifierBufferLength); + } + //this is where we parse the BlockMeta + searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); + return searcher; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java new file mode 100644 index 0000000..5e1f48b --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.ReversibleCellScanner; + +/** + * Methods for going backwards through a PrefixTree block. This class is split out on its own to + * simplify the Scanner superclass and Searcher subclass. + */ +@InterfaceAudience.Private +public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements + ReversibleCellScanner { + + /***************** construct ******************************/ + + public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + } + + + /***************** Object methods ***************************/ + + @Override + public boolean equals(Object obj) { + //trivial override to confirm intent (findbugs) + return super.equals(obj); + } + + + /***************** methods **********************************/ + + @Override + public boolean previous() { + if (afterLast) { + afterLast = false; + positionAtLastCell(); + return true; + } + if (beforeFirst) { + return false; + } + if (isFirstCellInRow()) { + previousRowInternal(); + if (beforeFirst) { + return false; + } + populateLastNonRowFields(); + return true; + } + populatePreviousNonRowFields(); + return true; + } + + @Override + public boolean previousRow(boolean endOfRow) { + previousRowInternal(); + if(beforeFirst){ + return false; + } + if(endOfRow){ + populateLastNonRowFields(); + }else{ + populateFirstNonRowFields(); + } + return true; + } + + private boolean previousRowInternal() { + if (beforeFirst) { + return false; + } + if (afterLast) { + positionAtLastRow(); + return true; + } + if (currentRowNode.hasOccurrences()) { + discardCurrentRowNode(false); + if(currentRowNode==null){ + return false; + } + } + while (!beforeFirst) { + if (isDirectlyAfterNub()) {//we are about to back up to the nub + currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf + nubCellsRemain = true;//this positions us on the nub + return true; + } + if (currentRowNode.hasPreviousFanNodes()) { + followPreviousFan(); + descendToLastRowFromCurrentPosition(); + } else {// keep going up the stack until we find previous fan positions + discardCurrentRowNode(false); + if(currentRowNode==null){ + return false; + } + } + if (currentRowNode.hasOccurrences()) {// escape clause + return true;// found some values + } + } + return false;// went past the beginning + } + + protected boolean isDirectlyAfterNub() { + return currentRowNode.isNub() && currentRowNode.getFanIndex()==0; + } + + protected void positionAtLastRow() { + reInitFirstNode(); + descendToLastRowFromCurrentPosition(); + } + + protected void descendToLastRowFromCurrentPosition() { + while (currentRowNode.hasChildren()) { + followLastFan(); + } + } + + protected void positionAtLastCell() { + positionAtLastRow(); + populateLastNonRowFields(); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java new file mode 100644 index 0000000..6cb670f --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java @@ -0,0 +1,506 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellScanner; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader; +import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader; +import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder; +import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; + +/** + * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and + * call get/set methods. + * + * This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This + * implementation requires that the bytes be in a normal java byte[] for performance. The + * alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer + * without copying the whole buffer on-heap. + */ +@InterfaceAudience.Private +public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner { + + /***************** fields ********************************/ + + protected PrefixTreeBlockMeta blockMeta; + + protected boolean beforeFirst; + protected boolean afterLast; + + protected RowNodeReader[] rowNodes; + protected int rowNodeStackIndex; + + protected RowNodeReader currentRowNode; + protected ColumnReader familyReader; + protected ColumnReader qualifierReader; + protected TimestampDecoder timestampDecoder; + protected MvccVersionDecoder mvccVersionDecoder; + + protected boolean nubCellsRemain; + protected int currentCellIndex; + + + /*********************** construct ******************************/ + + // pass in blockMeta so we can initialize buffers big enough for all cells in the block + public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + this.rowNodes = new RowNodeReader[rowTreeDepth]; + for (int i = 0; i < rowNodes.length; ++i) { + rowNodes[i] = new RowNodeReader(); + } + this.rowBuffer = new byte[rowBufferLength]; + this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH]; + this.familyReader = new ColumnReader(familyBuffer, true); + this.qualifierBuffer = new byte[qualifierBufferLength]; + this.qualifierReader = new ColumnReader(qualifierBuffer, false); + this.timestampDecoder = new TimestampDecoder(); + this.mvccVersionDecoder = new MvccVersionDecoder(); + } + + + /**************** init helpers ***************************************/ + + /** + * Call when first accessing a block. + * @return entirely new scanner if false + */ + public boolean areBuffersBigEnough() { + if (rowNodes.length < blockMeta.getRowTreeDepth()) { + return false; + } + if (rowBuffer.length < blockMeta.getMaxRowLength()) { + return false; + } + if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) { + return false; + } + return true; + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, + boolean includeMvccVersion) { + this.block = block; + this.blockMeta = blockMeta; + this.familyOffset = familyBuffer.length; + this.familyReader.initOnBlock(blockMeta, block); + this.qualifierOffset = qualifierBuffer.length; + this.qualifierReader.initOnBlock(blockMeta, block); + this.timestampDecoder.initOnBlock(blockMeta, block); + this.mvccVersionDecoder.initOnBlock(blockMeta, block); + this.includeMvccVersion = includeMvccVersion; + resetToBeforeFirstEntry(); + } + + // Does this have to be in the CellScanner Interface? TODO + public void resetToBeforeFirstEntry() { + beforeFirst = true; + afterLast = false; + rowNodeStackIndex = -1; + currentRowNode = null; + rowLength = 0; + familyOffset = familyBuffer.length; + familyLength = 0; + qualifierOffset = blockMeta.getMaxQualifierLength(); + qualifierLength = 0; + nubCellsRemain = false; + currentCellIndex = -1; + timestamp = -1L; + type = DEFAULT_TYPE; + absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized + valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length + } + + /** + * Call this before putting the scanner back into a pool so it doesn't hold the last used block + * in memory. + */ + public void releaseBlockReference(){ + block = null; + } + + + /********************** CellScanner **********************/ + + @Override + public Cell current() { + if(isOutOfBounds()){ + return null; + } + return (Cell)this; + } + + /******************* Object methods ************************/ + + @Override + public boolean equals(Object obj) { + //trivial override to confirm intent (findbugs) + return super.equals(obj); + } + + @Override + public int hashCode() { + return super.hashCode(); + } + + /** + * Override PrefixTreeCell.toString() with a check to see if the current cell is valid. + */ + @Override + public String toString() { + Cell currentCell = current(); + if(currentCell==null){ + return "null"; + } + return ((PrefixTreeCell)currentCell).getKeyValueString(); + } + + + /******************* advance ***************************/ + + public boolean positionAtFirstCell() { + reInitFirstNode(); + return advance(); + } + + @Override + public boolean advance() { + if (afterLast) { + return false; + } + if (!hasOccurrences()) { + resetToBeforeFirstEntry(); + } + if (beforeFirst || isLastCellInRow()) { + nextRow(); + if (afterLast) { + return false; + } + } else { + ++currentCellIndex; + } + + populateNonRowFields(currentCellIndex); + return true; + } + + + public boolean nextRow() { + nextRowInternal(); + if (afterLast) { + return false; + } + populateNonRowFields(currentCellIndex); + return true; + } + + + /** + * This method is safe to call when the scanner is not on a fully valid row node, as in the case + * of a row token miss in the Searcher + * @return true if we are positioned on a valid row, false if past end of block + */ + protected boolean nextRowInternal() { + if (afterLast) { + return false; + } + if (beforeFirst) { + initFirstNode(); + if (currentRowNode.hasOccurrences()) { + if (currentRowNode.isNub()) { + nubCellsRemain = true; + } + currentCellIndex = 0; + return true; + } + } + if (currentRowNode.isLeaf()) { + discardCurrentRowNode(true); + } + while (!afterLast) { + if (nubCellsRemain) { + nubCellsRemain = false; + } + if (currentRowNode.hasMoreFanNodes()) { + followNextFan(); + if (currentRowNode.hasOccurrences()) { + currentCellIndex = 0; + return true; + }// found some values + } else { + discardCurrentRowNode(true); + } + } + return false;// went past the end + } + + + /**************** secondary traversal methods ******************************/ + + protected void reInitFirstNode() { + resetToBeforeFirstEntry(); + initFirstNode(); + } + + protected void initFirstNode() { + int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset(); + rowNodeStackIndex = 0; + currentRowNode = rowNodes[0]; + currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure); + appendCurrentTokenToRowBuffer(); + beforeFirst = false; + } + + protected void followFirstFan() { + followFan(0); + } + + protected void followPreviousFan() { + int nextFanPosition = currentRowNode.getFanIndex() - 1; + followFan(nextFanPosition); + } + + protected void followCurrentFan() { + int currentFanPosition = currentRowNode.getFanIndex(); + followFan(currentFanPosition); + } + + protected void followNextFan() { + int nextFanPosition = currentRowNode.getFanIndex() + 1; + followFan(nextFanPosition); + } + + protected void followLastFan() { + followFan(currentRowNode.getLastFanIndex()); + } + + protected void followFan(int fanIndex) { + currentRowNode.setFanIndex(fanIndex); + appendToRowBuffer(currentRowNode.getFanByte(fanIndex)); + + int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset() + + currentRowNode.getNextNodeOffset(fanIndex, blockMeta); + ++rowNodeStackIndex; + + currentRowNode = rowNodes[rowNodeStackIndex]; + currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure); + + //TODO getToken is spewing garbage + appendCurrentTokenToRowBuffer(); + if (currentRowNode.isNub()) { + nubCellsRemain = true; + } + currentCellIndex = 0; + } + + /** + * @param forwards which marker to set if we overflow + */ + protected void discardCurrentRowNode(boolean forwards) { + RowNodeReader rowNodeBeingPopped = currentRowNode; + --rowNodeStackIndex;// pop it off the stack + if (rowNodeStackIndex < 0) { + currentRowNode = null; + if (forwards) { + markAfterLast(); + } else { + markBeforeFirst(); + } + return; + } + popFromRowBuffer(rowNodeBeingPopped); + currentRowNode = rowNodes[rowNodeStackIndex]; + } + + protected void markBeforeFirst() { + beforeFirst = true; + afterLast = false; + currentRowNode = null; + } + + protected void markAfterLast() { + beforeFirst = false; + afterLast = true; + currentRowNode = null; + } + + + /***************** helper methods **************************/ + + protected void appendCurrentTokenToRowBuffer() { + System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength, + currentRowNode.getTokenLength()); + rowLength += currentRowNode.getTokenLength(); + } + + protected void appendToRowBuffer(byte b) { + rowBuffer[rowLength] = b; + ++rowLength; + } + + protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) { + rowLength -= rowNodeBeingPopped.getTokenLength(); + --rowLength; // pop the parent's fan byte + } + + protected boolean hasOccurrences() { + return currentRowNode != null && currentRowNode.hasOccurrences(); + } + + protected boolean isBranch() { + return currentRowNode != null && !currentRowNode.hasOccurrences() + && currentRowNode.hasChildren(); + } + + protected boolean isNub() { + return currentRowNode != null && currentRowNode.hasOccurrences() + && currentRowNode.hasChildren(); + } + + protected boolean isLeaf() { + return currentRowNode != null && currentRowNode.hasOccurrences() + && !currentRowNode.hasChildren(); + } + + //TODO expose this in a PrefixTreeScanner interface + public boolean isBeforeFirst(){ + return beforeFirst; + } + + public boolean isAfterLast(){ + return afterLast; + } + + protected boolean isOutOfBounds(){ + return beforeFirst || afterLast; + } + + protected boolean isFirstCellInRow() { + return currentCellIndex == 0; + } + + protected boolean isLastCellInRow() { + return currentCellIndex == currentRowNode.getLastCellIndex(); + } + + + /********************* fill in family/qualifier/ts/type/value ************/ + + protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) { + populateNonRowFields(cellNum); + return CellComparator.compareStatic(this, key); + } + + protected void populateFirstNonRowFields() { + populateNonRowFields(0); + } + + protected void populatePreviousNonRowFields() { + populateNonRowFields(currentCellIndex - 1); + } + + protected void populateLastNonRowFields() { + populateNonRowFields(currentRowNode.getLastCellIndex()); + } + + protected void populateNonRowFields(int cellIndex) { + currentCellIndex = cellIndex; + populateFamily(); + populateQualifier(); + populateTimestamp(); + populateMvccVersion(); + populateType(); + populateValueOffsets(); + } + + protected void populateFamily() { + int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta); + familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset(); + familyLength = familyReader.getColumnLength(); + } + + protected void populateQualifier() { + int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta); + qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset(); + qualifierLength = qualifierReader.getColumnLength(); + } + + protected void populateTimestamp() { + if (blockMeta.isAllSameTimestamp()) { + timestamp = blockMeta.getMinTimestamp(); + } else { + int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta); + timestamp = timestampDecoder.getLong(timestampIndex); + } + } + + protected void populateMvccVersion() { + if (blockMeta.isAllSameMvccVersion()) { + mvccVersion = blockMeta.getMinMvccVersion(); + } else { + int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex, + blockMeta); + mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex); + } + } + + protected void populateType() { + int typeInt; + if (blockMeta.isAllSameType()) { + typeInt = blockMeta.getAllTypes(); + } else { + typeInt = currentRowNode.getType(currentCellIndex, blockMeta); + } + type = PrefixTreeCell.TYPES[typeInt]; + } + + protected void populateValueOffsets() { + int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta); + absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection; + valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta); + } + + + /**************** getters ***************************/ + + public byte[] getTreeBytes() { + return block; + } + + public PrefixTreeBlockMeta getBlockMeta() { + return blockMeta; + } + + public int getMaxRowTreeStackNodes() { + return rowNodes.length; + } + + public int getRowBufferLength() { + return rowBuffer.length; + } + + public int getQualifierBufferLength() { + return qualifierBuffer.length; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java new file mode 100644 index 0000000..097a997 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.primitives.UnsignedBytes; + +/** + * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to + * position itself on a requested Cell without scanning through cells before it. The PrefixTree is + * set up to be a Trie of rows, so finding a particular row is extremely cheap. + *

+ * Once it finds the row, it does a binary search through the cells inside the row, which is not as + * fast as the trie search, but faster than iterating through every cell like existing block + * formats + * do. For this reason, this implementation is targeted towards schemas where rows are narrow + * enough + * to have several or many per block, and where you are generally looking for the entire row or + * the + * first cell. It will still be fast for wide rows or point queries, but could be improved upon. + */ +@InterfaceAudience.Private +public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements + CellSearcher { + + /*************** construct ******************************/ + + public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + } + + + /********************* CellSearcher methods *******************/ + + @Override + public boolean positionAt(Cell key) { + return CellScannerPosition.AT == positionAtOrAfter(key); + } + + @Override + public CellScannerPosition positionAtOrBefore(Cell key) { + reInitFirstNode(); + int fanIndex = -1; + + while(true){ + //detect row mismatch. break loop if mismatch + int currentNodeDepth = rowLength; + int rowTokenComparison = compareToCurrentToken(key); + if(rowTokenComparison != 0){ + return fixRowTokenMissReverse(rowTokenComparison); + } + + //exact row found, move on to qualifier & ts + if(rowMatchesAfterCurrentPosition(key)){ + return positionAtQualifierTimestamp(key, true); + } + + //detect dead end (no fan to descend into) + if(!currentRowNode.hasFan()){ + if(hasOccurrences()){//must be leaf or nub + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + }else{ + //TODO i don't think this case is exercised by any tests + return fixRowFanMissReverse(0); + } + } + + //keep hunting for the rest of the row + byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth); + fanIndex = currentRowNode.whichFanNode(searchForByte); + if(fanIndex < 0){//no matching row. return early + int insertionPoint = -fanIndex; + return fixRowFanMissReverse(insertionPoint); + } + //found a match, so dig deeper into the tree + followFan(fanIndex); + } + } + + /** + * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra + * if-statements. Priority on readability and debugability. + */ + @Override + public CellScannerPosition positionAtOrAfter(Cell key) { + reInitFirstNode(); + int fanIndex = -1; + + while(true){ + //detect row mismatch. break loop if mismatch + int currentNodeDepth = rowLength; + int rowTokenComparison = compareToCurrentToken(key); + if(rowTokenComparison != 0){ + return fixRowTokenMissForward(rowTokenComparison); + } + + //exact row found, move on to qualifier & ts + if(rowMatchesAfterCurrentPosition(key)){ + return positionAtQualifierTimestamp(key, false); + } + + //detect dead end (no fan to descend into) + if(!currentRowNode.hasFan()){ + if(hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + }else{ + //TODO i don't think this case is exercised by any tests + return fixRowFanMissForward(0); + } + } + + //keep hunting for the rest of the row + byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth); + fanIndex = currentRowNode.whichFanNode(searchForByte); + if(fanIndex < 0){//no matching row. return early + int insertionPoint = -fanIndex; + return fixRowFanMissForward(insertionPoint); + } + //found a match, so dig deeper into the tree + followFan(fanIndex); + } + } + + @Override + public boolean seekForwardTo(Cell key) { + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return false; + } + return positionAt(key); + } + + @Override + public CellScannerPosition seekForwardToOrBefore(Cell key) { + //Do we even need this check or should upper layers avoid this situation. It's relatively + //expensive compared to the rest of the seek operation. + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return CellScannerPosition.AFTER; + } + + return positionAtOrBefore(key); + } + + @Override + public CellScannerPosition seekForwardToOrAfter(Cell key) { + //Do we even need this check or should upper layers avoid this situation. It's relatively + //expensive compared to the rest of the seek operation. + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return CellScannerPosition.AFTER; + } + + return positionAtOrAfter(key); + } + + /** + * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false + */ + @Override + public void positionAfterLastCell() { + resetToBeforeFirstEntry(); + beforeFirst = false; + afterLast = true; + } + + + /***************** Object methods ***************************/ + + @Override + public boolean equals(Object obj) { + //trivial override to confirm intent (findbugs) + return super.equals(obj); + } + + + /****************** internal methods ************************/ + + protected boolean currentPositionIsAfter(Cell cell){ + return compareTo(cell) > 0; + } + + protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) { + int minIndex = 0; + int maxIndex = currentRowNode.getLastCellIndex(); + int diff; + while (true) { + int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow + diff = populateNonRowFieldsAndCompareTo(midIndex, key); + + if (diff == 0) {// found exact match + return CellScannerPosition.AT; + } else if (minIndex == maxIndex) {// even termination case + break; + } else if ((minIndex + 1) == maxIndex) {// odd termination case + diff = populateNonRowFieldsAndCompareTo(maxIndex, key); + if(diff > 0){ + diff = populateNonRowFieldsAndCompareTo(minIndex, key); + } + break; + } else if (diff < 0) {// keep going forward + minIndex = currentCellIndex; + } else {// went past it, back up + maxIndex = currentCellIndex; + } + } + + if (diff == 0) { + return CellScannerPosition.AT; + + } else if (diff < 0) {// we are before key + if (beforeOnMiss) { + return CellScannerPosition.BEFORE; + } + if (advance()) { + return CellScannerPosition.AFTER; + } + return CellScannerPosition.AFTER_LAST; + + } else {// we are after key + if (!beforeOnMiss) { + return CellScannerPosition.AFTER; + } + if (previous()) { + return CellScannerPosition.BEFORE; + } + return CellScannerPosition.BEFORE_FIRST; + } + } + + /** + * compare this.row to key.row but starting at the current rowLength + * @param key Cell being searched for + * @return true if row buffer contents match key.row + */ + protected boolean rowMatchesAfterCurrentPosition(Cell key) { + if (!currentRowNode.hasOccurrences()) { + return false; + } + int thatRowLength = key.getRowLength(); + if (rowLength != thatRowLength) { + return false; + } + return true; + } + + // TODO move part of this to Cell comparator? + /** + * Compare only the bytes within the window of the current token + * @param key + * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after + */ + protected int compareToCurrentToken(Cell key) { + int startIndex = rowLength - currentRowNode.getTokenLength(); + int endIndexExclusive = startIndex + currentRowNode.getTokenLength(); + for (int i = startIndex; i < endIndexExclusive; ++i) { + if (i >= key.getRowLength()) {// key was shorter, so it's first + return -1; + } + byte keyByte = CellUtil.getRowByte(key, i); + byte thisByte = rowBuffer[i]; + if (keyByte == thisByte) { + continue; + } + return UnsignedBytes.compare(keyByte, thisByte); + } + return 0; + } + + protected void followLastFansUntilExhausted(){ + while(currentRowNode.hasFan()){ + followLastFan(); + } + } + + + /****************** complete seek when token mismatch ******************/ + + /** + * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position + */ + protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) { + if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up + boolean foundPreviousRow = previousRow(true); + if(foundPreviousRow){ + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + }else{ + return CellScannerPosition.BEFORE_FIRST; + } + + }else{//searcher position is before the input key + if(currentRowNode.hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.BEFORE; + } + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + } + + /** + * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position + */ + protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) { + if (searcherIsAfterInputKey < 0) {//searcher position is after the input key + if(currentRowNode.hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + } + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + + }else{//searcher position is before the input key, so go forward + discardCurrentRowNode(true); + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + } + + + /****************** complete seek when fan mismatch ******************/ + + protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){ + if(fanInsertionPoint == 0){//we need to back up a row + boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row + if(foundPreviousRow){ + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + } + return CellScannerPosition.BEFORE_FIRST; + } + + //follow the previous fan, but then descend recursively forward + followFan(fanInsertionPoint - 1); + followLastFansUntilExhausted(); + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + } + + protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){ + if(fanInsertionPoint >= currentRowNode.getFanOut()){ + discardCurrentRowNode(true); + if (!nextRow()) { + return CellScannerPosition.AFTER_LAST; + } else { + return CellScannerPosition.AFTER; + } + } + + followFan(fanInsertionPoint); + if(hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + } + + if(nextRowInternal()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java new file mode 100644 index 0000000..b4ce25f --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; + +/** + * As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields + * of this class so that Cell logic can be applied, but without allocating new memory for every Cell + * iterated through. + */ +@InterfaceAudience.Private +public class PrefixTreeCell implements Cell, Comparable { + + /********************** static **********************/ + + public static final KeyValue.Type[] TYPES = new KeyValue.Type[256]; + static { + for (KeyValue.Type type : KeyValue.Type.values()) { + TYPES[type.getCode() & 0xff] = type; + } + } + + //Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized. + public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put; + + /******************** fields ************************/ + + protected byte[] block; + //we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler + protected boolean includeMvccVersion; + + protected byte[] rowBuffer; + protected int rowLength; + + protected byte[] familyBuffer; + protected int familyOffset; + protected int familyLength; + + protected byte[] qualifierBuffer;// aligned to the end of the array + protected int qualifierOffset; + protected int qualifierLength; + + protected Long timestamp; + protected Long mvccVersion; + + protected KeyValue.Type type; + + protected int absoluteValueOffset; + protected int valueLength; + + + /********************** Cell methods ******************/ + + /** + * For debugging. Currently creates new KeyValue to utilize its toString() method. + */ + @Override + public String toString() { + return getKeyValueString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Cell)) { + return false; + } + //Temporary hack to maintain backwards compatibility with KeyValue.equals + return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj); + + //TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907 + } + + @Override + public int hashCode(){ + //Temporary hack to maintain backwards compatibility with KeyValue.hashCode + //I don't think this is used in any hot code paths + return KeyValueUtil.copyToNewKeyValue(this).hashCode(); + + //TODO return CellComparator.hashCode(this);//see HBASE-6907 + } + + @Override + public int compareTo(Cell other) { + return CellComparator.compareStatic(this, other); + } + + @Override + public long getTimestamp() { + return timestamp; + } + + @Override + public long getMvccVersion() { + if (!includeMvccVersion) { + return 0L; + } + return mvccVersion; + } + + @Override + public int getValueLength() { + return valueLength; + } + + @Override + public byte[] getRowArray() { + return rowBuffer; + } + + @Override + public int getRowOffset() { + return 0; + } + + @Override + public short getRowLength() { + return (short) rowLength; + } + + @Override + public byte[] getFamilyArray() { + return familyBuffer; + } + + @Override + public int getFamilyOffset() { + return familyOffset; + } + + @Override + public byte getFamilyLength() { + return (byte) familyLength; + } + + @Override + public byte[] getQualifierArray() { + return qualifierBuffer; + } + + @Override + public int getQualifierOffset() { + return qualifierOffset; + } + + @Override + public int getQualifierLength() { + return qualifierLength; + } + + @Override + public byte[] getValueArray() { + return block; + } + + @Override + public int getValueOffset() { + return absoluteValueOffset; + } + + @Override + public byte getTypeByte() { + return type.getCode(); + } + + + /************************* helper methods *************************/ + + /** + * Need this separate method so we can call it from subclasses' toString() methods + */ + protected String getKeyValueString(){ + KeyValue kv = KeyValueUtil.copyToNewKeyValue(this); + return kv.toString(); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java new file mode 100644 index 0000000..e9cf05d --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode.column; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.apache.hadoop.hbase.util.vint.UVIntTool; + +@InterfaceAudience.Private +public class ColumnNodeReader { + + /**************** fields ************************/ + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + protected byte[] columnBuffer; + protected boolean familyVsQualifier; + + protected int offsetIntoBlock; + + protected int tokenOffsetIntoBlock; + protected int tokenLength; + protected int parentStartPosition; + + + /************** construct *************************/ + + public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) { + this.columnBuffer = columnBuffer; + this.familyVsQualifier = familyVsQualifier; + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.blockMeta = blockMeta; + this.block = block; + } + + + /************* methods *****************************/ + + public void positionAt(int offsetIntoBlock) { + this.offsetIntoBlock = offsetIntoBlock; + tokenLength = UVIntTool.getInt(block, offsetIntoBlock); + tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength); + int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength; + int offsetWidth; + if (familyVsQualifier) { + offsetWidth = blockMeta.getFamilyOffsetWidth(); + } else { + offsetWidth = blockMeta.getQualifierOffsetWidth(); + } + parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth); + } + + public void prependTokenToBuffer(int bufferStartIndex) { + System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength); + } + + public boolean isRoot() { + if (familyVsQualifier) { + return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset(); + } else { + return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset(); + } + } + + + /************** standard methods *********************/ + + @Override + public String toString() { + return super.toString() + "[" + offsetIntoBlock + "]"; + } + + + /****************** get/set ****************************/ + + public int getTokenLength() { + return tokenLength; + } + + public int getParentStartPosition() { + return parentStartPosition; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java new file mode 100644 index 0000000..2b04a4b --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode.column; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; + +/** + * Position one of these appropriately in the data block and you can call its methods to retrieve + * the family or qualifier at the current position. + */ +@InterfaceAudience.Private +public class ColumnReader { + + /****************** fields *************************/ + + protected PrefixTreeBlockMeta blockMeta; + + protected byte[] columnBuffer; + protected int columnOffset; + protected int columnLength; + protected boolean familyVsQualifier; + + protected ColumnNodeReader columnNodeReader; + + + /******************** construct *******************/ + + public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) { + this.columnBuffer = columnBuffer; + this.familyVsQualifier = familyVsQualifier; + this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier); + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.blockMeta = blockMeta; + clearColumnBuffer(); + columnNodeReader.initOnBlock(blockMeta, block); + } + + + /********************* methods *******************/ + + public ColumnReader populateBuffer(int offsetIntoColumnData) { + clearColumnBuffer(); + int nextRelativeOffset = offsetIntoColumnData; + while (true) { + int absoluteOffset; + if (familyVsQualifier) { + absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset; + } else { + absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset; + } + columnNodeReader.positionAt(absoluteOffset); + columnOffset -= columnNodeReader.getTokenLength(); + columnLength += columnNodeReader.getTokenLength(); + columnNodeReader.prependTokenToBuffer(columnOffset); + if (columnNodeReader.isRoot()) { + return this; + } + nextRelativeOffset = columnNodeReader.getParentStartPosition(); + } + } + + public byte[] copyBufferToNewArray() {// for testing + byte[] out = new byte[columnLength]; + System.arraycopy(columnBuffer, columnOffset, out, 0, out.length); + return out; + } + + public int getColumnLength() { + return columnLength; + } + + public void clearColumnBuffer() { + columnOffset = columnBuffer.length; + columnLength = 0; + } + + + /****************************** get/set *************************************/ + + public int getColumnOffset() { + return columnOffset; + } + +} + diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java new file mode 100644 index 0000000..ffe1e1a --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode.row; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.apache.hadoop.hbase.util.vint.UVIntTool; + +/** + * Position one of these appropriately in the data block and you can call its methods to retrieve + * information necessary to decode the cells in the row. + */ +@InterfaceAudience.Private +public class RowNodeReader { + + /************* fields ***********************************/ + + protected byte[] block; + protected int offset; + protected int fanIndex; + + protected int numCells; + + protected int tokenOffset; + protected int tokenLength; + protected int fanOffset; + protected int fanOut; + + protected int familyOffsetsOffset; + protected int qualifierOffsetsOffset; + protected int timestampIndexesOffset; + protected int mvccVersionIndexesOffset; + protected int operationTypesOffset; + protected int valueOffsetsOffset; + protected int valueLengthsOffset; + protected int nextNodeOffsetsOffset; + + + /******************* construct **************************/ + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) { + this.block = block; + + this.offset = offset; + resetFanIndex(); + + this.tokenLength = UVIntTool.getInt(block, offset); + this.tokenOffset = offset + UVIntTool.numBytes(tokenLength); + + this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength); + this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut); + + this.numCells = UVIntTool.getInt(block, fanOffset + fanOut); + + this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells); + this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth(); + this.timestampIndexesOffset = qualifierOffsetsOffset + numCells + * blockMeta.getQualifierOffsetWidth(); + this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells + * blockMeta.getTimestampIndexWidth(); + this.operationTypesOffset = mvccVersionIndexesOffset + numCells + * blockMeta.getMvccVersionIndexWidth(); + this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth(); + this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth(); + this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth(); + } + + + /******************** methods ****************************/ + + public boolean isLeaf() { + return fanOut == 0; + } + + public boolean isNub() { + return fanOut > 0 && numCells > 0; + } + + public boolean isBranch() { + return fanOut > 0 && numCells == 0; + } + + public boolean hasOccurrences() { + return numCells > 0; + } + + public int getTokenArrayOffset(){ + return tokenOffset; + } + + public int getTokenLength() { + return tokenLength; + } + + public byte getFanByte(int i) { + return block[fanOffset + i]; + } + + /** + * for debugging + */ + protected String getFanByteReadable(int i){ + return Bytes.toStringBinary(block, fanOffset + i, 1); + } + + public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getFamilyOffsetWidth(); + int startIndex = familyOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getQualifierOffsetWidth(); + int startIndex = qualifierOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getTimestampIndexWidth(); + int startIndex = timestampIndexesOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getMvccVersionIndexWidth(); + int startIndex = mvccVersionIndexesOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getType(int index, PrefixTreeBlockMeta blockMeta) { + if (blockMeta.isAllSameType()) { + return blockMeta.getAllTypes(); + } + return block[operationTypesOffset + index]; + } + + public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getValueOffsetWidth(); + int startIndex = valueOffsetsOffset + fIntWidth * index; + int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + return offset; + } + + public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getValueLengthWidth(); + int startIndex = valueLengthsOffset + fIntWidth * index; + int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + return length; + } + + public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getNextNodeOffsetWidth(); + int startIndex = nextNodeOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public String getBranchNubLeafIndicator() { + if (isNub()) { + return "N"; + } + return isBranch() ? "B" : "L"; + } + + public boolean hasChildren() { + return fanOut > 0; + } + + public int getLastFanIndex() { + return fanOut - 1; + } + + public int getLastCellIndex() { + return numCells - 1; + } + + public int getNumCells() { + return numCells; + } + + public int getFanOut() { + return fanOut; + } + + public byte[] getToken() { + // TODO pass in reusable ByteRange + return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray(); + } + + public int getOffset() { + return offset; + } + + public int whichFanNode(byte searchForByte) { + if( ! hasFan()){ + throw new IllegalStateException("This row node has no fan, so can't search it"); + } + int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut, + searchForByte); + if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block + return fanIndexInBlock - fanOffset; + } + return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse + } + + public void resetFanIndex() { + fanIndex = -1;// just the way the logic currently works + } + + public int getFanIndex() { + return fanIndex; + } + + public void setFanIndex(int fanIndex) { + this.fanIndex = fanIndex; + } + + public boolean hasFan(){ + return fanOut > 0; + } + + public boolean hasPreviousFanNodes() { + return fanOut > 0 && fanIndex > 0; + } + + public boolean hasMoreFanNodes() { + return fanIndex < getLastFanIndex(); + } + + public boolean isOnLastFanNode() { + return !hasMoreFanNodes(); + } + + + /*************** standard methods **************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut)); + sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength)); + sb.append(",numCells:" + numCells); + sb.append(",fanIndex:"+fanIndex); + if(fanIndex>=0){ + sb.append("("+getFanByteReadable(fanIndex)+")"); + } + return sb.toString(); + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java new file mode 100644 index 0000000..4a53510 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.util.vint.UFIntTool; + +/** + * Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block. + */ +@InterfaceAudience.Private +public class MvccVersionDecoder { + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + + /************** construct ***********************/ + + public MvccVersionDecoder() { + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.block = block; + this.blockMeta = blockMeta; + } + + + /************** methods *************************/ + + public long getMvccVersion(int index) { + if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical + return blockMeta.getMinMvccVersion(); + } + int startIndex = blockMeta.getAbsoluteMvccVersionOffset() + + blockMeta.getMvccVersionDeltaWidth() * index; + long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth()); + return blockMeta.getMinMvccVersion() + delta; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java new file mode 100644 index 0000000..cb7f412 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.util.vint.UFIntTool; + +/** + * Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block. + */ +@InterfaceAudience.Private +public class TimestampDecoder { + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + + /************** construct ***********************/ + + public TimestampDecoder() { + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.block = block; + this.blockMeta = blockMeta; + } + + + /************** methods *************************/ + + public long getLong(int index) { + if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical + return blockMeta.getMinTimestamp(); + } + int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth() + * index; + long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth()); + return blockMeta.getMinTimestamp() + delta; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java new file mode 100644 index 0000000..ba5340d --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderFactory.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the + * ones retrieved from the pool for usage. + */ +@InterfaceAudience.Private +public class EncoderFactory { + + private static final EncoderPool POOL = new ThreadLocalEncoderPool(); + + + public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) { + return POOL.checkOut(outputStream, includeMvccVersion); + } + + public static void checkIn(PrefixTreeEncoder encoder) { + POOL.checkIn(encoder); + } + + + /**************************** helper ******************************/ + + protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder, + OutputStream outputStream, boolean includeMvccVersion) { + PrefixTreeEncoder ret = encoder; + if (encoder == null) { + ret = new PrefixTreeEncoder(outputStream, includeMvccVersion); + } + ret.reset(outputStream, includeMvccVersion); + return ret; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java new file mode 100644 index 0000000..3b9df91 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/EncoderPool.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + + +@InterfaceAudience.Private +public interface EncoderPool { + + PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion); + void checkIn(PrefixTreeEncoder encoder); + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java new file mode 100644 index 0000000..7817c38 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hadoop.hbase.io.CellOutputStream; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.byterange.ByteRangeSet; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.apache.hadoop.io.WritableUtils; + +/** + * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are + * added they are completely copied into the various encoding structures. This is important because + * usually the cells being fed in during compactions will be transient.
+ *
+ * Usage:
+ * 1) constructor
+ * 4) append cells in sorted order: write(Cell cell)
+ * 5) flush()
+ */ +@InterfaceAudience.Private +public class PrefixTreeEncoder implements CellOutputStream { + + /**************** static ************************/ + + protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class); + + //future-proof where HBase supports multiple families in a data block. + public static final boolean MULITPLE_FAMILIES_POSSIBLE = false; + + private static final boolean USE_HASH_COLUMN_SORTER = true; + private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256; + private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024; + + + /**************** fields *************************/ + + protected long numResets = 0L; + + protected OutputStream outputStream; + + /* + * Cannot change during a single block's encoding. If false, then substitute incoming Cell's + * mvccVersion with zero and write out the block as usual. + */ + protected boolean includeMvccVersion; + + /* + * reusable ByteRanges used for communicating with the sorters/compilers + */ + protected ByteRange rowRange; + protected ByteRange familyRange; + protected ByteRange qualifierRange; + + /* + * incoming Cell fields are copied into these arrays + */ + protected long[] timestamps; + protected long[] mvccVersions; + protected byte[] typeBytes; + protected int[] valueOffsets; + protected byte[] values; + + protected PrefixTreeBlockMeta blockMeta; + + /* + * Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and + * compile before flushing. + */ + protected LongEncoder timestampEncoder; + protected LongEncoder mvccVersionEncoder; + protected CellTypeEncoder cellTypeEncoder; + + /* + * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them + * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by + * comparing only with the previous row key, families and qualifiers can arrive in unsorted order + * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them. + */ + protected ByteRangeSet familyDeduplicator; + protected ByteRangeSet qualifierDeduplicator; + + /* + * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory + * trie structure with nodes connected by memory pointers (not serializable yet). + */ + protected Tokenizer rowTokenizer; + protected Tokenizer familyTokenizer; + protected Tokenizer qualifierTokenizer; + + /* + * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write + * all information to an output stream of bytes that can be stored on disk. + */ + protected RowSectionWriter rowWriter; + protected ColumnSectionWriter familyWriter; + protected ColumnSectionWriter qualifierWriter; + + /* + * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they + * were full KeyValues because some parts of HBase like to know the "unencoded size". + */ + protected int totalCells = 0; + protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues + protected int totalValueBytes = 0; + protected int maxValueLength = 0; + protected int totalBytes = 0;// + + + /***************** construct ***********************/ + + public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) { + // used during cell accumulation + this.blockMeta = new PrefixTreeBlockMeta(); + this.rowRange = new ByteRange(); + this.familyRange = new ByteRange(); + this.qualifierRange = new ByteRange(); + this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES]; + this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES]; + this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES]; + this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES]; + this.values = new byte[VALUE_BUFFER_INIT_SIZE]; + + // used during compilation + this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() + : new ByteRangeTreeSet(); + this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() + : new ByteRangeTreeSet(); + this.timestampEncoder = new LongEncoder(); + this.mvccVersionEncoder = new LongEncoder(); + this.cellTypeEncoder = new CellTypeEncoder(); + this.rowTokenizer = new Tokenizer(); + this.familyTokenizer = new Tokenizer(); + this.qualifierTokenizer = new Tokenizer(); + this.rowWriter = new RowSectionWriter(); + this.familyWriter = new ColumnSectionWriter(); + this.qualifierWriter = new ColumnSectionWriter(); + + reset(outputStream, includeMvccVersion); + } + + public void reset(OutputStream outputStream, boolean includeMvccVersion) { + ++numResets; + this.includeMvccVersion = includeMvccVersion; + this.outputStream = outputStream; + valueOffsets[0] = 0; + + familyDeduplicator.reset(); + qualifierDeduplicator.reset(); + rowTokenizer.reset(); + timestampEncoder.reset(); + mvccVersionEncoder.reset(); + cellTypeEncoder.reset(); + familyTokenizer.reset(); + qualifierTokenizer.reset(); + rowWriter.reset(); + familyWriter.reset(); + qualifierWriter.reset(); + + totalCells = 0; + totalUnencodedBytes = 0; + totalValueBytes = 0; + maxValueLength = 0; + totalBytes = 0; + } + + /** + * Check that the arrays used to hold cell fragments are large enough for the cell that is being + * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the + * first few block encodings but should stabilize quickly. + */ + protected void ensurePerCellCapacities() { + int currentCapacity = valueOffsets.length; + int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe + if (neededCapacity < currentCapacity) { + return; + } + + int padding = neededCapacity;//this will double the array size + timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding); + mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding); + typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding); + valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding); + } + + /******************** CellOutputStream methods *************************/ + + /** + * Note: Unused until support is added to the scanner/heap + *

+ * The following method are optimized versions of write(Cell cell). The result should be + * identical, however the implementation may be able to execute them much more efficiently because + * it does not need to compare the unchanged fields with the previous cell's. + *

+ * Consider the benefits during compaction when paired with a CellScanner that is also aware of + * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells + * to the write(Cell cell) method. + *

+ * The savings of skipping duplicate row detection are significant with long row keys. A + * DataBlockEncoder may store a row key once in combination with a count of how many cells are in + * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment + * of the counter, and that is for every cell in the row. + */ + + /** + * Add a Cell to the output stream but repeat the previous row. + */ + //@Override + public void writeWithRepeatRow(Cell cell) { + ensurePerCellCapacities();//can we optimize away some of this? + + //save a relatively expensive row comparison, incrementing the row's counter instead + rowTokenizer.incrementNumOccurrencesOfLatestValue(); + addFamilyPart(cell); + addQualifierPart(cell); + addAfterRowFamilyQualifier(cell); + } + + + @Override + public void write(Cell cell) { + ensurePerCellCapacities(); + + rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange)); + addFamilyPart(cell); + addQualifierPart(cell); + addAfterRowFamilyQualifier(cell); + } + + + /***************** internal add methods ************************/ + + private void addAfterRowFamilyQualifier(Cell cell){ + // timestamps + timestamps[totalCells] = cell.getTimestamp(); + timestampEncoder.add(cell.getTimestamp()); + + // memstore timestamps + if (includeMvccVersion) { + mvccVersions[totalCells] = cell.getMvccVersion(); + mvccVersionEncoder.add(cell.getMvccVersion()); + totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion()); + }else{ + //must overwrite in case there was a previous version in this array slot + mvccVersions[totalCells] = 0L; + if(totalCells == 0){//only need to do this for the first cell added + mvccVersionEncoder.add(0L); + } + //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled + } + + // types + typeBytes[totalCells] = cell.getTypeByte(); + cellTypeEncoder.add(cell.getTypeByte()); + + // values + totalValueBytes += cell.getValueLength(); + // double the array each time we run out of space + values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes); + CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]); + if (cell.getValueLength() > maxValueLength) { + maxValueLength = cell.getValueLength(); + } + valueOffsets[totalCells + 1] = totalValueBytes; + + // general + totalUnencodedBytes += KeyValueUtil.length(cell); + ++totalCells; + } + + private void addFamilyPart(Cell cell) { + if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) { + CellUtil.fillFamilyRange(cell, familyRange); + familyDeduplicator.add(familyRange); + } + } + + private void addQualifierPart(Cell cell) { + CellUtil.fillQualifierRange(cell, qualifierRange); + qualifierDeduplicator.add(qualifierRange); + } + + + /****************** compiling/flushing ********************/ + + /** + * Expensive method. The second half of the encoding work happens here. + * + * Take all the separate accumulated data structures and turn them into a single stream of bytes + * which is written to the outputStream. + */ + @Override + public void flush() throws IOException { + compile(); + + // do the actual flushing to the output stream. Order matters. + blockMeta.writeVariableBytesToOutputStream(outputStream); + rowWriter.writeBytes(outputStream); + familyWriter.writeBytes(outputStream); + qualifierWriter.writeBytes(outputStream); + timestampEncoder.writeBytes(outputStream); + mvccVersionEncoder.writeBytes(outputStream); + //CellType bytes are in the row nodes. there is no additional type section + outputStream.write(values, 0, totalValueBytes); + } + + /** + * Now that all the cells have been added, do the work to reduce them to a series of byte[] + * fragments that are ready to be written to the output stream. + */ + protected void compile(){ + blockMeta.setNumKeyValueBytes(totalUnencodedBytes); + int lastValueOffset = valueOffsets[totalCells]; + blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); + blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); + blockMeta.setNumValueBytes(totalValueBytes); + totalBytes += totalValueBytes; + + //these compile methods will add to totalBytes + compileTypes(); + compileMvccVersions(); + compileTimestamps(); + compileQualifiers(); + compileFamilies(); + compileRows(); + + int numMetaBytes = blockMeta.calculateNumMetaBytes(); + blockMeta.setNumMetaBytes(numMetaBytes); + totalBytes += numMetaBytes; + } + + /** + * The following "compile" methods do any intermediate work necessary to transform the cell + * fragments collected during the writing phase into structures that are ready to write to the + * outputStream. + *

+ * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion. + */ + + protected void compileTypes() { + blockMeta.setAllSameType(cellTypeEncoder.areAllSameType()); + if(cellTypeEncoder.areAllSameType()){ + blockMeta.setAllTypes(cellTypeEncoder.getOnlyType()); + } + } + + protected void compileMvccVersions() { + mvccVersionEncoder.compile(); + blockMeta.setMvccVersionFields(mvccVersionEncoder); + int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength(); + totalBytes += numMvccVersionBytes; + } + + protected void compileTimestamps() { + timestampEncoder.compile(); + blockMeta.setTimestampFields(timestampEncoder); + int numTimestampBytes = timestampEncoder.getOutputArrayLength(); + totalBytes += numTimestampBytes; + } + + protected void compileQualifiers() { + blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size()); + qualifierDeduplicator.compile(); + qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges()); + qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false); + qualifierWriter.compile(); + int numQualifierBytes = qualifierWriter.getNumBytes(); + blockMeta.setNumQualifierBytes(numQualifierBytes); + totalBytes += numQualifierBytes; + } + + protected void compileFamilies() { + blockMeta.setNumUniqueFamilies(familyDeduplicator.size()); + familyDeduplicator.compile(); + familyTokenizer.addAll(familyDeduplicator.getSortedRanges()); + familyWriter.reconstruct(blockMeta, familyTokenizer, true); + familyWriter.compile(); + int numFamilyBytes = familyWriter.getNumBytes(); + blockMeta.setNumFamilyBytes(numFamilyBytes); + totalBytes += numFamilyBytes; + } + + protected void compileRows() { + rowWriter.reconstruct(this); + rowWriter.compile(); + int numRowBytes = rowWriter.getNumBytes(); + blockMeta.setNumRowBytes(numRowBytes); + blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth()); + totalBytes += numRowBytes; + } + + /********************* convenience getters ********************************/ + + public long getValueOffset(int index) { + return valueOffsets[index]; + } + + public int getValueLength(int index) { + return (int) (valueOffsets[index + 1] - valueOffsets[index]); + } + + /************************* get/set *************************************/ + + public PrefixTreeBlockMeta getBlockMeta() { + return blockMeta; + } + + public Tokenizer getRowTokenizer() { + return rowTokenizer; + } + + public LongEncoder getTimestampEncoder() { + return timestampEncoder; + } + + public int getTotalBytes() { + return totalBytes; + } + + public long[] getTimestamps() { + return timestamps; + } + + public long[] getMvccVersions() { + return mvccVersions; + } + + public byte[] getTypeBytes() { + return typeBytes; + } + + public LongEncoder getMvccVersionEncoder() { + return mvccVersionEncoder; + } + + public ByteRangeSet getFamilySorter() { + return familyDeduplicator; + } + + public ByteRangeSet getQualifierSorter() { + return qualifierDeduplicator; + } + + public ColumnSectionWriter getFamilyWriter() { + return familyWriter; + } + + public ColumnSectionWriter getQualifierWriter() { + return qualifierWriter; + } + + public RowSectionWriter getRowWriter() { + return rowWriter; + } + + public ByteRange getValueByteRange() { + return new ByteRange(values, 0, totalValueBytes); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java new file mode 100644 index 0000000..6cbe0c2 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and + * would be more garbage than the data in the block. A new encoder is needed for each block in + * a flush, compaction, RPC response, etc. + * + * It is not a pool in the traditional sense, but implements the semantics of a traditional pool + * via ThreadLocals to avoid sharing between threads. Sharing between threads would not be + * very expensive given that it's accessed per-block, but this is just as easy. + * + * This pool implementation assumes there is a one-to-one mapping between a single thread and a + * single flush or compaction. + */ +@InterfaceAudience.Private +public class ThreadLocalEncoderPool implements EncoderPool{ + + private static final ThreadLocal ENCODER + = new ThreadLocal(); + + /** + * Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the + * current thread. + */ + @Override + public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) { + PrefixTreeEncoder builder = ENCODER.get(); + builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion); + ENCODER.set(builder); + return builder; + } + + @Override + public void checkIn(PrefixTreeEncoder encoder) { + // attached to thread on checkOut, so shouldn't need to do anything here + + // do we need to worry about detaching encoders from compaction threads or are the same threads + // used over and over + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java new file mode 100644 index 0000000..0105f8f --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.column; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Strings; +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.apache.hadoop.hbase.util.vint.UVIntTool; + +/** + * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly. + * The family and qualifier sections of the data block are made of 1 or more of these nodes. + *

+ * Each node is composed of 3 sections:
+ *

  • tokenLength: UVInt (normally 1 byte) indicating the number of token bytes + *
  • token[]: the actual token bytes + *
  • parentStartPosition: the offset of the next node from the start of the family or qualifier + * section + */ +@InterfaceAudience.Private +public class ColumnNodeWriter{ + + /************* fields ****************************/ + + protected TokenizerNode builderNode; + protected PrefixTreeBlockMeta blockMeta; + + protected boolean familyVsQualifier; + + protected int tokenLength; + protected byte[] token; + protected int parentStartPosition; + + + /*************** construct **************************/ + + public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode, + boolean familyVsQualifier) { + this.blockMeta = blockMeta; + this.builderNode = builderNode; + this.familyVsQualifier = familyVsQualifier; + calculateTokenLength(); + } + + + /************* methods *******************************/ + + public boolean isRoot() { + return parentStartPosition == 0; + } + + private void calculateTokenLength() { + tokenLength = builderNode.getTokenLength(); + token = new byte[tokenLength]; + } + + /** + * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a + * placeholder. + * @param offsetWidthPlaceholder the placeholder + * @return node width + */ + public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) { + int width = 0; + width += UVIntTool.numBytes(tokenLength); + width += token.length; + width += offsetWidthPlaceholder; + return width; + } + + public void writeBytes(OutputStream os) throws IOException { + int parentOffsetWidth; + if (familyVsQualifier) { + parentOffsetWidth = blockMeta.getFamilyOffsetWidth(); + } else { + parentOffsetWidth = blockMeta.getQualifierOffsetWidth(); + } + UVIntTool.writeBytes(tokenLength, os); + os.write(token); + UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os); + } + + public void setTokenBytes(ByteRange source) { + source.deepCopySubRangeTo(0, tokenLength, token, 0); + } + + + /****************** standard methods ************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ","); + sb.append("["); + sb.append(Bytes.toString(token)); + sb.append("]->"); + sb.append(parentStartPosition); + return sb.toString(); + } + + + /************************** get/set ***********************/ + + public void setParentStartPosition(int parentStartPosition) { + this.parentStartPosition = parentStartPosition; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java new file mode 100644 index 0000000..122ffb4 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.column; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.vint.UFIntTool; + +import com.google.common.collect.Lists; + +/** + * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family + * section is written after the row section, and qualifier section after family section. + *

    + * The family and qualifier tries, or "column tries", are structured differently than the row trie. + * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and + * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a + * row, the row trie contains a list of offsets into the column sections (along with pointers to + * timestamps and other per-cell fields). These offsets point to the last column node/token that + * comprises the column name. To assemble the column name, the trie is traversed in reverse (right + * to left), with the rightmost tokens pointing to the start of their "parent" node which is the + * node to the left. + *

    + * This choice was made to reduce the size of the column trie by storing the minimum amount of + * offset data. As a result, to find a specific qualifier within a row, you must do a binary search + * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might + * encode the columns in both a forward and reverse trie, which would convert binary searches into + * more efficient trie searches which would be beneficial for wide rows. + */ +@InterfaceAudience.Private +public class ColumnSectionWriter { + + public static final int EXPECTED_NUBS_PLUS_LEAVES = 100; + + /****************** fields ****************************/ + + private PrefixTreeBlockMeta blockMeta; + + private boolean familyVsQualifier; + private Tokenizer tokenizer; + private int numBytes = 0; + private ArrayList nonLeaves; + private ArrayList leaves; + private ArrayList allNodes; + private ArrayList columnNodeWriters; + private List outputArrayOffsets; + + + /*********************** construct *********************/ + + public ColumnSectionWriter() { + this.nonLeaves = Lists.newArrayList(); + this.leaves = Lists.newArrayList(); + this.outputArrayOffsets = Lists.newArrayList(); + } + + public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder, + boolean familyVsQualifier) { + this();// init collections + reconstruct(blockMeta, builder, familyVsQualifier); + } + + public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder, + boolean familyVsQualifier) { + this.blockMeta = blockMeta; + this.tokenizer = builder; + this.familyVsQualifier = familyVsQualifier; + } + + public void reset() { + numBytes = 0; + nonLeaves.clear(); + leaves.clear(); + outputArrayOffsets.clear(); + } + + + /****************** methods *******************************/ + + public ColumnSectionWriter compile() { + if (familyVsQualifier) { + // do nothing. max family length fixed at Byte.MAX_VALUE + } else { + blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength()); + } + + tokenizer.setNodeFirstInsertionIndexes(); + + tokenizer.appendNodes(nonLeaves, true, false); + + tokenizer.appendNodes(leaves, false, true); + + allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size()); + allNodes.addAll(nonLeaves); + allNodes.addAll(leaves); + + columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes)); + for (int i = 0; i < allNodes.size(); ++i) { + TokenizerNode node = allNodes.get(i); + columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier)); + } + + // leaf widths are known at this point, so add them up + int totalBytesWithoutOffsets = 0; + for (int i = allNodes.size() - 1; i >= 0; --i) { + ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); + // leaves store all but their first token byte + totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0); + } + + // figure out how wide our offset FInts are + int parentOffsetWidth = 0; + while (true) { + ++parentOffsetWidth; + int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size(); + if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) { + numBytes = numBytesFinder; + break; + }// it fits + } + if (familyVsQualifier) { + blockMeta.setFamilyOffsetWidth(parentOffsetWidth); + } else { + blockMeta.setQualifierOffsetWidth(parentOffsetWidth); + } + + int forwardIndex = 0; + for (int i = 0; i < allNodes.size(); ++i) { + TokenizerNode node = allNodes.get(i); + ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); + int fullNodeWidth = columnNodeWriter + .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth); + node.setOutputArrayOffset(forwardIndex); + columnNodeWriter.setTokenBytes(node.getToken()); + if (node.isRoot()) { + columnNodeWriter.setParentStartPosition(0); + } else { + columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset()); + } + forwardIndex += fullNodeWidth; + } + + tokenizer.appendOutputArrayOffsets(outputArrayOffsets); + + return this; + } + + public void writeBytes(OutputStream os) throws IOException { + for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) { + columnNodeWriter.writeBytes(os); + } + } + + + /************* get/set **************************/ + + public ArrayList getColumnNodeWriters() { + return columnNodeWriters; + } + + public int getNumBytes() { + return numBytes; + } + + public int getOutputArrayOffset(int sortedIndex) { + return outputArrayOffsets.get(sortedIndex); + } + + public ArrayList getNonLeaves() { + return nonLeaves; + } + + public ArrayList getLeaves() { + return leaves; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java new file mode 100644 index 0000000..c8d6707 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.other; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each + * KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta, + * therefore not repeating it for each cell and saving 1 byte per cell. + */ +@InterfaceAudience.Private +public class CellTypeEncoder { + + /************* fields *********************/ + + protected boolean pendingFirstType = true; + protected boolean allSameType = true; + protected byte onlyType; + + + /************* construct *********************/ + + public void reset() { + pendingFirstType = true; + allSameType = true; + } + + + /************* methods *************************/ + + public void add(byte type) { + if (pendingFirstType) { + onlyType = type; + pendingFirstType = false; + } else if (onlyType != type) { + allSameType = false; + } + } + + + /**************** get/set **************************/ + + public boolean areAllSameType() { + return allSameType; + } + + public byte getOnlyType() { + return onlyType; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java new file mode 100644 index 0000000..553d6cb --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/LongEncoder.java @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.other; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.HashSet; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.vint.UFIntTool; + +import com.google.common.base.Joiner; + +/** + * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a + * collection of Cells. + * + * 1. add longs to a HashSet for fast de-duplication + * 2. keep track of the min and max + * 3. copy all values to a new long[] + * 4. Collections.sort the long[] + * 5. calculate maxDelta = max - min + * 6. determine FInt width based on maxDelta + * 7. PrefixTreeEncoder binary searches to find index of each value + */ +@InterfaceAudience.Private +public class LongEncoder { + + /****************** fields ****************************/ + + protected HashSet uniqueValues; + protected long[] sortedUniqueValues; + protected long min, max, maxDelta; + + protected int bytesPerDelta; + protected int bytesPerIndex; + protected int totalCompressedBytes; + + + /****************** construct ****************************/ + + public LongEncoder() { + this.uniqueValues = new HashSet(); + } + + public void reset() { + uniqueValues.clear(); + sortedUniqueValues = null; + min = Long.MAX_VALUE; + max = Long.MIN_VALUE; + maxDelta = Long.MIN_VALUE; + bytesPerIndex = 0; + bytesPerDelta = 0; + totalCompressedBytes = 0; + } + + + /************* methods ***************************/ + + public void add(long timestamp) { + uniqueValues.add(timestamp); + } + + public LongEncoder compile() { + int numUnique = uniqueValues.size(); + if (numUnique == 1) { + min = CollectionUtils.getFirst(uniqueValues); + sortedUniqueValues = new long[] { min }; + return this; + } + + sortedUniqueValues = new long[numUnique]; + int lastIndex = -1; + for (long value : uniqueValues) { + sortedUniqueValues[++lastIndex] = value; + } + Arrays.sort(sortedUniqueValues); + min = ArrayUtils.getFirst(sortedUniqueValues); + max = ArrayUtils.getLast(sortedUniqueValues); + maxDelta = max - min; + if (maxDelta > 0) { + bytesPerDelta = UFIntTool.numBytes(maxDelta); + } else { + bytesPerDelta = 0; + } + + int maxIndex = numUnique - 1; + bytesPerIndex = UFIntTool.numBytes(maxIndex); + + totalCompressedBytes = numUnique * bytesPerDelta; + + return this; + } + + public long getDelta(int index) { + if (sortedUniqueValues.length == 0) { + return 0; + } + return sortedUniqueValues[index] - min; + } + + public int getIndex(long value) { + // should always find an exact match + return Arrays.binarySearch(sortedUniqueValues, value); + } + + public void writeBytes(OutputStream os) throws IOException { + for (int i = 0; i < sortedUniqueValues.length; ++i) { + long delta = sortedUniqueValues[i] - min; + UFIntTool.writeBytes(bytesPerDelta, delta, os); + } + } + + //convenience method for tests + public byte[] getByteArray() throws IOException{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writeBytes(baos); + return baos.toByteArray(); + } + + public int getOutputArrayLength() { + return sortedUniqueValues.length * bytesPerDelta; + } + + public int getNumUniqueValues() { + return sortedUniqueValues.length; + } + + + /******************* Object methods **********************/ + + @Override + public String toString() { + if (ArrayUtils.isEmpty(sortedUniqueValues)) { + return "[]"; + } + return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]"; + } + + + /******************** get/set **************************/ + + public long getMin() { + return min; + } + + public int getBytesPerDelta() { + return bytesPerDelta; + } + + public int getBytesPerIndex() { + return bytesPerIndex; + } + + public int getTotalCompressedBytes() { + return totalCompressedBytes; + } + + public long[] getSortedUniqueTimestamps() { + return sortedUniqueValues; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java new file mode 100644 index 0000000..29ebafa --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.row; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.apache.hadoop.hbase.util.vint.UVIntTool; + +/** + * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf. + * Please see the write() method for the order in which data is written. + */ +@InterfaceAudience.Private +public class RowNodeWriter{ + protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class); + + /********************* fields ******************************/ + + protected PrefixTreeEncoder prefixTreeEncoder; + protected PrefixTreeBlockMeta blockMeta; + protected TokenizerNode tokenizerNode; + + protected int tokenWidth; + protected int fanOut; + protected int numCells; + + protected int width; + + + /*********************** construct *************************/ + + public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) { + reconstruct(keyValueBuilder, tokenizerNode); + } + + public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) { + this.prefixTreeEncoder = prefixTreeEncoder; + reset(tokenizerNode); + } + + public void reset(TokenizerNode node) { + this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks + this.tokenizerNode = node; + this.tokenWidth = 0; + this.fanOut = 0; + this.numCells = 0; + this.width = 0; + calculateOffsetsAndLengths(); + } + + + /********************* methods ****************************/ + + protected void calculateOffsetsAndLengths(){ + tokenWidth = tokenizerNode.getTokenLength(); + if(!tokenizerNode.isRoot()){ + --tokenWidth;//root has no parent + } + fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren()); + numCells = tokenizerNode.getNumOccurrences(); + } + + public int calculateWidth(){ + calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth()); + return width; + } + + public int calculateWidthOverrideOffsetWidth(int offsetWidth){ + width = 0; + width += UVIntTool.numBytes(tokenWidth); + width += tokenWidth; + + width += UVIntTool.numBytes(fanOut); + width += fanOut; + + width += UVIntTool.numBytes(numCells); + + if(tokenizerNode.hasOccurrences()){ + int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth() + + blockMeta.getQualifierOffsetWidth() + + blockMeta.getTimestampIndexWidth() + + blockMeta.getMvccVersionIndexWidth() + + blockMeta.getKeyValueTypeWidth() + + blockMeta.getValueOffsetWidth() + + blockMeta.getValueLengthWidth(); + width += numCells * fixedBytesPerCell; + } + + if( ! tokenizerNode.isLeaf()){ + width += fanOut * offsetWidth; + } + + return width; + } + + + /*********************** writing the compiled structure to the OutputStream ***************/ + + public void write(OutputStream os) throws IOException{ + //info about this row trie node + writeRowToken(os); + writeFan(os); + writeNumCells(os); + + //UFInt indexes and offsets for each cell in the row (if nub or leaf) + writeFamilyNodeOffsets(os); + writeQualifierNodeOffsets(os); + writeTimestampIndexes(os); + writeMvccVersionIndexes(os); + writeCellTypes(os); + writeValueOffsets(os); + writeValueLengths(os); + + //offsets to the children of this row trie node (if branch or nub) + writeNextRowTrieNodeOffsets(os); + } + + + /** + * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3 + * fields can reproduce all the row keys that compose the block. + */ + + /** + * UVInt: tokenWidth + * bytes: token + */ + protected void writeRowToken(OutputStream os) throws IOException { + UVIntTool.writeBytes(tokenWidth, os); + int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1; + ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex); + } + + /** + * UVInt: numFanBytes/fanOut + * bytes: each fan byte + */ + public void writeFan(OutputStream os) throws IOException { + UVIntTool.writeBytes(fanOut, os); + if (fanOut <= 0) { + return; + } + ArrayList children = tokenizerNode.getChildren(); + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + os.write(child.getToken().get(0));// first byte of each child's token + } + } + + /** + * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes + */ + protected void writeNumCells(OutputStream os) throws IOException { + UVIntTool.writeBytes(numCells, os); + } + + + /** + * The following methods write data for each cell in the row, mostly consisting of indexes or + * offsets into the timestamp/column data structures that are written in the middle of the block. + * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary + * search of a particular column/timestamp combination. + *

    + * Branch nodes will not have any data in these sections. + */ + + protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { + if (blockMeta.getFamilyOffsetWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode + .getFirstInsertionIndex() + i : 0; + int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( + cellInsertionIndex); + int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( + sortedIndex); + UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); + } + } + + protected void writeQualifierNodeOffsets(OutputStream os) throws IOException { + if (blockMeta.getQualifierOffsetWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId( + cellInsertionIndex); + int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset( + sortedIndex); + UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os); + } + } + + protected void writeTimestampIndexes(OutputStream os) throws IOException { + if (blockMeta.getTimestampIndexWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex]; + int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp); + UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os); + } + } + + protected void writeMvccVersionIndexes(OutputStream os) throws IOException { + if (blockMeta.getMvccVersionIndexWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex]; + int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion); + UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os); + } + } + + protected void writeCellTypes(OutputStream os) throws IOException { + if (blockMeta.isAllSameType()) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]); + } + } + + protected void writeValueOffsets(OutputStream os) throws IOException { + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex); + UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os); + } + } + + protected void writeValueLengths(OutputStream os) throws IOException { + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex); + UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os); + } + } + + + /** + * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes. + */ + protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException { + ArrayList children = tokenizerNode.getChildren(); + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex(); + UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os); + } + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java new file mode 100644 index 0000000..f5d4eba --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowSectionWriter.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.row; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.util.vint.UFIntTool; + +import com.google.common.collect.Lists; + +/** + * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row + * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie + * also contains references to offsets in the other sections of the data block that enable the + * decoder to match a row key with its qualifier, timestamp, type, value, etc. + *

    + * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the + * internals of each row node. + */ +@InterfaceAudience.Private +public class RowSectionWriter { + + /***************** fields **************************/ + + protected PrefixTreeEncoder prefixTreeEncoder; + + protected PrefixTreeBlockMeta blockMeta; + + protected int numBytes; + + protected ArrayList nonLeaves; + protected ArrayList leaves; + + protected ArrayList leafWriters; + protected ArrayList nonLeafWriters; + + protected int numLeafWriters; + protected int numNonLeafWriters; + + + /********************* construct **********************/ + + public RowSectionWriter() { + this.nonLeaves = Lists.newArrayList(); + this.leaves = Lists.newArrayList(); + this.leafWriters = Lists.newArrayList(); + this.nonLeafWriters = Lists.newArrayList(); + } + + public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) { + reconstruct(prefixTreeEncoder); + } + + public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) { + this.prefixTreeEncoder = prefixTreeEncoder; + this.blockMeta = prefixTreeEncoder.getBlockMeta(); + reset(); + } + + public void reset() { + numBytes = 0; + nonLeaves.clear(); + leaves.clear(); + numLeafWriters = 0; + numNonLeafWriters = 0; + } + + + /****************** methods *******************************/ + + public RowSectionWriter compile() { + blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength()); + prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes(); + + prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false); + prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true); + + // track the starting position of each node in final output + int negativeIndex = 0; + + // create leaf writer nodes + // leaf widths are known at this point, so add them up + int totalLeafBytes = 0; + for (int i = leaves.size() - 1; i >= 0; --i) { + TokenizerNode leaf = leaves.get(i); + RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf); + ++numLeafWriters; + // leaves store all but their first token byte + int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0); + totalLeafBytes += leafNodeWidth; + negativeIndex += leafNodeWidth; + leaf.setNegativeIndex(negativeIndex); + } + + int totalNonLeafBytesWithoutOffsets = 0; + int totalChildPointers = 0; + for (int i = nonLeaves.size() - 1; i >= 0; --i) { + TokenizerNode nonLeaf = nonLeaves.get(i); + RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf); + ++numNonLeafWriters; + totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0); + totalChildPointers += nonLeaf.getNumChildren(); + } + + // figure out how wide our offset FInts are + int offsetWidth = 0; + while (true) { + ++offsetWidth; + int offsetBytes = totalChildPointers * offsetWidth; + int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes; + if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) { + // it fits + numBytes = totalRowBytes; + break; + } + } + blockMeta.setNextNodeOffsetWidth(offsetWidth); + + // populate negativeIndexes + for (int i = nonLeaves.size() - 1; i >= 0; --i) { + TokenizerNode nonLeaf = nonLeaves.get(i); + int writerIndex = nonLeaves.size() - i - 1; + RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex); + int nodeWidth = nonLeafWriter.calculateWidth(); + negativeIndex += nodeWidth; + nonLeaf.setNegativeIndex(negativeIndex); + } + + return this; + } + + protected RowNodeWriter initializeWriter(List list, int index, + TokenizerNode builderNode) { + RowNodeWriter rowNodeWriter = null; + //check if there is an existing node we can recycle + if (index >= list.size()) { + //there are not enough existing nodes, so add a new one which will be retrieved below + list.add(new RowNodeWriter(prefixTreeEncoder, builderNode)); + } + rowNodeWriter = list.get(index); + rowNodeWriter.reset(builderNode); + return rowNodeWriter; + } + + + public void writeBytes(OutputStream os) throws IOException { + for (int i = numNonLeafWriters - 1; i >= 0; --i) { + RowNodeWriter nonLeafWriter = nonLeafWriters.get(i); + nonLeafWriter.write(os); + } + // duplicates above... written more for clarity right now + for (int i = numLeafWriters - 1; i >= 0; --i) { + RowNodeWriter leafWriter = leafWriters.get(i); + leafWriter.write(os); + } + } + + + /***************** static ******************************/ + + protected static ArrayList filterByLeafAndReverse( + ArrayList ins, boolean leaves) { + ArrayList outs = Lists.newArrayList(); + for (int i = ins.size() - 1; i >= 0; --i) { + TokenizerNode n = ins.get(i); + if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) { + outs.add(ins.get(i)); + } + } + return outs; + } + + + /************* get/set **************************/ + + public int getNumBytes() { + return numBytes; + } + + public ArrayList getNonLeaves() { + return nonLeaves; + } + + public ArrayList getLeaves() { + return leaves; + } + + public ArrayList getNonLeafWriters() { + return nonLeafWriters; + } + + public ArrayList getLeafWriters() { + return leafWriters; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java new file mode 100644 index 0000000..e10db3a --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize; + +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Determines order of nodes in the output array. Maybe possible to optimize further. + */ +@InterfaceAudience.Private +public class TokenDepthComparator implements Comparator { + + @Override + public int compare(TokenizerNode a, TokenizerNode b) { + if(a==null){ + throw new IllegalArgumentException("a cannot be null"); + } + if(b==null){ + throw new IllegalArgumentException("b cannot be null"); + } + + // put leaves at the end + if (!a.isLeaf() && b.isLeaf()) { + return -1; + } + if (a.isLeaf() && !b.isLeaf()) { + return 1; + } + + if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability) + return a.getId() < b.getId() ? -1 : 1; + } + + // compare depth + if (a.getTokenOffset() < b.getTokenOffset()) { + return -1; + } + if (a.getTokenOffset() > b.getTokenOffset()) { + return 1; + } + + // if same depth, return lower id first. ids are unique + return a.getId() < b.getId() ? -1 : 1; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java new file mode 100644 index 0000000..a21bd12 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; + +import com.google.common.collect.Lists; + +/** + * Data structure used in the first stage of PrefixTree encoding: + *

  • accepts a sorted stream of ByteRanges + *
  • splits them into a set of tokens, each held by a {@link TokenizerNode} + *
  • connects the TokenizerNodes via standard java references + *
  • keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content + *


    + * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier + * encoding. + */ +@InterfaceAudience.Private +public class Tokenizer{ + + /***************** fields **************************/ + + protected int numArraysAdded = 0; + protected long lastNodeId = -1; + protected ArrayList nodes; + protected int numNodes; + protected TokenizerNode root; + protected byte[] tokens; + protected int tokensLength; + + protected int maxElementLength = 0; + // number of levels in the tree assuming root level is 0 + protected int treeDepth = 0; + + + /******************* construct *******************/ + + public Tokenizer() { + this.nodes = Lists.newArrayList(); + this.tokens = new byte[0]; + } + + public void reset() { + numArraysAdded = 0; + lastNodeId = -1; + numNodes = 0; + tokensLength = 0; + root = null; + maxElementLength = 0; + treeDepth = 0; + } + + + /***************** building *************************/ + + public void addAll(ArrayList sortedByteRanges) { + for (int i = 0; i < sortedByteRanges.size(); ++i) { + ByteRange byteRange = sortedByteRanges.get(i); + addSorted(byteRange); + } + } + + public void addSorted(final ByteRange bytes) { + ++numArraysAdded; + if (bytes.getLength() > maxElementLength) { + maxElementLength = bytes.getLength(); + } + if (root == null) { + // nodeDepth of firstNode (non-root) is 1 + root = addNode(null, 1, 0, bytes, 0); + } else { + root.addSorted(bytes); + } + } + + public void incrementNumOccurrencesOfLatestValue(){ + CollectionUtils.getLast(nodes).incrementNumOccurrences(1); + } + + protected long nextNodeId() { + return ++lastNodeId; + } + + protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset, + final ByteRange token, int inputTokenOffset) { + int inputTokenLength = token.getLength() - inputTokenOffset; + int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset); + TokenizerNode node = null; + if (nodes.size() <= numNodes) { + node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset, + inputTokenLength); + nodes.add(node); + } else { + node = nodes.get(numNodes); + node.reset(); + node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength); + } + ++numNodes; + return node; + } + + protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) { + int newOffset = tokensLength; + int inputTokenLength = token.getLength() - inputTokenOffset; + int newMinimum = tokensLength + inputTokenLength; + tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum); + token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength); + tokensLength += inputTokenLength; + return newOffset; + } + + protected void submitMaxNodeDepthCandidate(int nodeDepth) { + if (nodeDepth > treeDepth) { + treeDepth = nodeDepth; + } + } + + + /********************* read ********************/ + + public int getNumAdded(){ + return numArraysAdded; + } + + // for debugging + public ArrayList getNodes(boolean includeNonLeaves, boolean includeLeaves) { + ArrayList nodes = Lists.newArrayList(); + root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves); + return nodes; + } + + public void appendNodes(List appendTo, boolean includeNonLeaves, + boolean includeLeaves) { + root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); + } + + public List getArrays() { + List nodes = new ArrayList(); + root.appendNodesToExternalList(nodes, true, true); + List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes)); + for (int i = 0; i < nodes.size(); ++i) { + TokenizerNode node = nodes.get(i); + for (int j = 0; j < node.getNumOccurrences(); ++j) { + byte[] byteArray = node.getNewByteArray(); + byteArrays.add(byteArray); + } + } + return byteArrays; + } + + //currently unused, but working and possibly useful in the future + public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, + int keyLength) { + root.getNode(resultHolder, key, keyOffset, keyLength); + } + + + /********************** write ***************************/ + + public Tokenizer setNodeFirstInsertionIndexes() { + root.setInsertionIndexes(0); + return this; + } + + public Tokenizer appendOutputArrayOffsets(List offsets) { + root.appendOutputArrayOffsets(offsets); + return this; + } + + + /********************* print/debug ********************/ + + protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false; + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getStructuralString()); + if (INCLUDE_FULL_TREE_IN_TO_STRING) { + for (byte[] bytes : getArrays()) { + if (sb.length() > 0) { + sb.append("\n"); + } + sb.append(Bytes.toString(bytes)); + } + } + return sb.toString(); + } + + public String getStructuralString() { + List nodes = getNodes(true, true); + StringBuilder sb = new StringBuilder(); + for (TokenizerNode node : nodes) { + String line = node.getPaddedTokenAndOccurrenceString(); + sb.append(line + "\n"); + } + return sb.toString(); + } + + + /****************** get/set ************************/ + + public TokenizerNode getRoot() { + return root; + } + + public int getMaxElementLength() { + return maxElementLength; + } + + public int getTreeDepth() { + return treeDepth; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java new file mode 100644 index 0000000..077b5f5 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java @@ -0,0 +1,632 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.Strings; + +import com.google.common.collect.Lists; + +/** + * Individual node in a Trie structure. Each node is one of 3 types: + *

  • Branch: an internal trie node that may have a token and must have multiple children, but does + * not represent an actual input byte[], hence its numOccurrences is 0 + *
  • Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the + * last bytes in the input byte[]s. + *
  • Nub: a combination of a branch and leaf. Its token represents the last bytes of input + * byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s + * that add bytes to this nodes input byte[]. + *

    + * Example inputs (numInputs=7): + * 0: AAA + * 1: AAA + * 2: AAB + * 3: AAB + * 4: AAB + * 5: AABQQ + * 6: AABQQ + *

    + * Resulting TokenizerNodes: + * AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2 + * A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1 + * B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1 + * QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2 + *

    + * numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2 + */ +@InterfaceAudience.Private +public class TokenizerNode{ + + /* + * Ref to data structure wrapper + */ + protected Tokenizer builder; + + /****************************************************************** + * Tree content/structure used during tokenization + * ****************************************************************/ + + /* + * ref to parent trie node + */ + protected TokenizerNode parent; + + /* + * node depth in trie, irrespective of each node's token length + */ + protected int nodeDepth; + + /* + * start index of this token in original byte[] + */ + protected int tokenStartOffset; + + /* + * bytes for this trie node. can be length 0 in root node + */ + protected ByteRange token; + + /* + * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for + * nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing + * that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode. + */ + protected int numOccurrences; + + /* + * The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256 + * child nodes. + */ + protected ArrayList children; + + + /* + * Fields used later in the encoding process for sorting the nodes into the order they'll be + * written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer + * are not generic data structures but instead are specific to HBase PrefixTree encoding. + */ + + /* + * unique id assigned to each TokenizerNode + */ + protected long id; + + /* + * set >=0 for nubs and leaves + */ + protected int firstInsertionIndex = -1; + + /* + * A positive value indicating how many bytes before the end of the block this node will start. If + * the section is 55 bytes and negativeOffset is 9, then the node will start at 46. + */ + protected int negativeIndex = 0; + + /* + * The offset in the output array at which to start writing this node's token bytes. Influenced + * by the lengths of all tokens sorted before this one. + */ + protected int outputArrayOffset = -1; + + + /*********************** construct *****************************/ + + public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth, + int tokenStartOffset, int tokenOffset, int tokenLength) { + this.token = new ByteRange(); + reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength); + this.children = Lists.newArrayList(); + } + + /* + * Sub-constructor for initializing all fields without allocating a new object. Used by the + * regular constructor. + */ + public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth, + int tokenStartOffset, int tokenOffset, int tokenLength) { + this.builder = builder; + this.id = builder.nextNodeId(); + this.parent = parent; + this.nodeDepth = nodeDepth; + builder.submitMaxNodeDepthCandidate(nodeDepth); + this.tokenStartOffset = tokenStartOffset; + this.token.set(builder.tokens, tokenOffset, tokenLength); + this.numOccurrences = 1; + } + + /* + * Clear the state of this node so that it looks like it was just allocated. + */ + public void reset() { + builder = null; + parent = null; + nodeDepth = 0; + tokenStartOffset = 0; + token.clear(); + numOccurrences = 0; + children.clear();// branches & nubs + + // ids/offsets. used during writing to byte[] + id = 0; + firstInsertionIndex = -1;// set >=0 for nubs and leaves + negativeIndex = 0; + outputArrayOffset = -1; + } + + + /************************* building *********************************/ + + /* + *
  • Only public method used during the tokenization process + *
  • Requires that the input ByteRange sort after the previous, and therefore after all previous + * inputs + *
  • Only looks at bytes of the input array that align with this node's token + */ + public void addSorted(final ByteRange bytes) {// recursively build the tree + + /* + * Recurse deeper into the existing trie structure + */ + if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) { + TokenizerNode lastChild = CollectionUtils.getLast(children); + if (lastChild.partiallyMatchesToken(bytes)) { + lastChild.addSorted(bytes); + return; + } + } + + /* + * Recursion ended. We must either + *
  • 1: increment numOccurrences if this input was equal to the previous + *
  • 2: convert this node from a leaf to a nub, and add a new child leaf + *
  • 3: split this node into a branch and leaf, and then add a second leaf + */ + + // add it as a child of this node + int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length + int tailOffset = tokenStartOffset + numIdenticalTokenBytes; + int tailLength = bytes.getLength() - tailOffset; + + if (numIdenticalTokenBytes == token.getLength()) { + if (tailLength == 0) {// identical to this node (case 1) + incrementNumOccurrences(1); + } else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2) + int childNodeDepth = nodeDepth + 1; + int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes; + TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset, + bytes, tailOffset); + addChild(newChildNode); + } + } else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3) + split(numIdenticalTokenBytes, bytes); + } + } + + + protected void addChild(TokenizerNode node) { + node.setParent(this); + children.add(node); + } + + + /** + * Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the + * method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output + * will be 3 nodes:
    + *
  • 1: B <- branch + *
  • 2: AA <- leaf + *
  • 3: OO <- leaf + * + * @param numTokenBytesToRetain => 1 (the B) + * @param bytes => BOO + */ + protected void split(int numTokenBytesToRetain, final ByteRange bytes) { + int childNodeDepth = nodeDepth; + int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain; + + //create leaf AA + TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, + token, numTokenBytesToRetain); + firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences + token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B + numOccurrences = 0;//current node is now a branch + + moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B) + addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children + + //create leaf OO + TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, + bytes, tokenStartOffset + numTokenBytesToRetain); + addChild(secondChild);//add the new leaf (00) to the branch's (B's) children + + // we inserted branch node B as a new level above/before the two children, so increment the + // depths of the children below + firstChild.incrementNodeDepthRecursively(); + secondChild.incrementNodeDepthRecursively(); + } + + + protected void incrementNodeDepthRecursively() { + ++nodeDepth; + builder.submitMaxNodeDepthCandidate(nodeDepth); + for (int i = 0; i < children.size(); ++i) { + children.get(i).incrementNodeDepthRecursively(); + } + } + + + protected void moveChildrenToDifferentParent(TokenizerNode newParent) { + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.setParent(newParent); + newParent.children.add(child); + } + children.clear(); + } + + + /************************ byte[] utils *************************/ + + protected boolean partiallyMatchesToken(ByteRange bytes) { + return numIdenticalBytes(bytes) > 0; + } + + protected boolean matchesToken(ByteRange bytes) { + return numIdenticalBytes(bytes) == getTokenLength(); + } + + protected int numIdenticalBytes(ByteRange bytes) { + return token.numEqualPrefixBytes(bytes, tokenStartOffset); + } + + + /***************** moving nodes around ************************/ + + public void appendNodesToExternalList(List appendTo, boolean includeNonLeaves, + boolean includeLeaves) { + if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) { + appendTo.add(this); + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); + } + } + + public int setInsertionIndexes(int nextIndex) { + int newNextIndex = nextIndex; + if (hasOccurrences()) { + setFirstInsertionIndex(nextIndex); + newNextIndex += numOccurrences; + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + newNextIndex = child.setInsertionIndexes(newNextIndex); + } + return newNextIndex; + } + + public void appendOutputArrayOffsets(List offsets) { + if (hasOccurrences()) { + offsets.add(outputArrayOffset); + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.appendOutputArrayOffsets(offsets); + } + } + + + /***************** searching *********************************/ + + /* + * Do a trie style search through the tokenizer. One option for looking up families or qualifiers + * during encoding, but currently unused in favor of tracking this information as they are added. + * + * Keeping code pending further performance testing. + */ + public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, + int keyLength) { + int thisNodeDepthPlusLength = tokenStartOffset + token.getLength(); + + // quick check if the key is shorter than this node (may not work for binary search) + if (CollectionUtils.isEmpty(children)) { + if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + } + + // all token bytes must match + for (int i = 0; i < token.getLength(); ++i) { + if (key[tokenStartOffset + keyOffset + i] != token.get(i)) { + // TODO return whether it's before or after so we can binary search + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + } + + if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) { + resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH + return; + } + + if (CollectionUtils.notEmpty(children)) { + // TODO binary search the children + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.getNode(resultHolder, key, keyOffset, keyLength); + if (resultHolder.isMatch()) { + return; + } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) { + // passed it, so it doesn't exist + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + // key is still AFTER the current node, so continue searching + } + } + + // checked all children (or there were no children), and didn't find it + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + + + /****************** writing back to byte[]'s *************************/ + + public byte[] getNewByteArray() { + byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()]; + fillInBytes(arrayToFill); + return arrayToFill; + } + + public void fillInBytes(byte[] arrayToFill) { + for (int i = 0; i < token.getLength(); ++i) { + arrayToFill[tokenStartOffset + i] = token.get(i); + } + if (parent != null) { + parent.fillInBytes(arrayToFill); + } + } + + + /************************** printing ***********************/ + + @Override + public String toString() { + String s = ""; + if (parent == null) { + s += "R "; + } else { + s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray()); + } + s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]"; + if (numOccurrences > 0) { + s += "x" + numOccurrences; + } + return s; + } + + public String getPaddedTokenAndOccurrenceString() { + StringBuilder sb = new StringBuilder(); + sb.append(getBnlIndicator(true)); + sb.append(Strings.padFront(numOccurrences + "", ' ', 3)); + sb.append(Strings.padFront(nodeDepth + "", ' ', 3)); + if (outputArrayOffset >= 0) { + sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3)); + } + sb.append(" "); + for (int i = 0; i < tokenStartOffset; ++i) { + sb.append(" "); + } + sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_")); + return sb.toString(); + } + + public String getBnlIndicator(boolean indent) { + if (indent) { + if (isNub()) { + return " N "; + } + return isBranch() ? "B " : " L"; + } + if (isNub()) { + return "N"; + } + return isBranch() ? "B" : "L"; + } + + + /********************** count different node types ********************/ + + public int getNumBranchNodesIncludingThisNode() { + if (isLeaf()) { + return 0; + } + int totalFromThisPlusChildren = isBranch() ? 1 : 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode(); + } + return totalFromThisPlusChildren; + } + + public int getNumNubNodesIncludingThisNode() { + if (isLeaf()) { + return 0; + } + int totalFromThisPlusChildren = isNub() ? 1 : 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode(); + } + return totalFromThisPlusChildren; + } + + public int getNumLeafNodesIncludingThisNode() { + if (isLeaf()) { + return 1; + } + int totalFromChildren = 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromChildren += child.getNumLeafNodesIncludingThisNode(); + } + return totalFromChildren; + } + + + /*********************** simple read-only methods *******************************/ + + public int getNodeDepth() { + return nodeDepth; + } + + public int getTokenLength() { + return token.getLength(); + } + + public boolean hasOccurrences() { + return numOccurrences > 0; + } + + public boolean isRoot() { + return this.parent == null; + } + + public int getNumChildren() { + return CollectionUtils.nullSafeSize(children); + } + + public TokenizerNode getLastChild() { + if (CollectionUtils.isEmpty(children)) { + return null; + } + return CollectionUtils.getLast(children); + } + + public boolean isLeaf() { + return CollectionUtils.isEmpty(children) && hasOccurrences(); + } + + public boolean isBranch() { + return CollectionUtils.notEmpty(children) && !hasOccurrences(); + } + + public boolean isNub() { + return CollectionUtils.notEmpty(children) && hasOccurrences(); + } + + + /********************** simple mutation methods *************************/ + + /** + * Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by + * an external class without going through the process of detecting a repeat if it is a known + * repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows + * the new cells are part of the current row. + * @param d increment by this amount + */ + public void incrementNumOccurrences(int d) { + numOccurrences += d; + } + + + /************************* autogenerated get/set ******************/ + + public int getTokenOffset() { + return tokenStartOffset; + } + + public TokenizerNode getParent() { + return parent; + } + + public ByteRange getToken() { + return token; + } + + public int getNumOccurrences() { + return numOccurrences; + } + + public void setParent(TokenizerNode parent) { + this.parent = parent; + } + + public void setNumOccurrences(int numOccurrences) { + this.numOccurrences = numOccurrences; + } + + public ArrayList getChildren() { + return children; + } + + public long getId() { + return id; + } + + public int getFirstInsertionIndex() { + return firstInsertionIndex; + } + + public void setFirstInsertionIndex(int firstInsertionIndex) { + this.firstInsertionIndex = firstInsertionIndex; + } + + public int getNegativeIndex() { + return negativeIndex; + } + + public void setNegativeIndex(int negativeIndex) { + this.negativeIndex = negativeIndex; + } + + public int getOutputArrayOffset() { + return outputArrayOffset; + } + + public void setOutputArrayOffset(int outputArrayOffset) { + this.outputArrayOffset = outputArrayOffset; + } + + public void setId(long id) { + this.id = id; + } + + public void setBuilder(Tokenizer builder) { + this.builder = builder; + } + + public void setTokenOffset(int tokenOffset) { + this.tokenStartOffset = tokenOffset; + } + + public void setToken(ByteRange token) { + this.token = token; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java new file mode 100644 index 0000000..37909d9 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * Warning: currently unused, but code is valid. Pending performance testing on more data sets. + * + * Where is the key relative to our current position in the tree. For example, the current tree node + * is "BEFORE" the key we are seeking + */ +@InterfaceAudience.Private +public enum TokenizerRowSearchPosition { + + AFTER,//the key is after this tree node, so keep searching + BEFORE,//in a binary search, this tells us to back up + MATCH,//the current node is a full match + NO_MATCH,//might as well return a value more informative than null + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java new file mode 100644 index 0000000..2d22c50 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * for recursively searching a PtBuilder + */ +@InterfaceAudience.Private +public class TokenizerRowSearchResult{ + + /************ fields ************************/ + + protected TokenizerRowSearchPosition difference; + protected TokenizerNode matchingNode; + + + /*************** construct *****************/ + + public TokenizerRowSearchResult() { + } + + public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) { + this.difference = difference; + } + + public TokenizerRowSearchResult(TokenizerNode matchingNode) { + this.difference = TokenizerRowSearchPosition.MATCH; + this.matchingNode = matchingNode; + } + + + /*************** methods **********************/ + + public boolean isMatch() { + return TokenizerRowSearchPosition.MATCH == difference; + } + + + /************* get/set ***************************/ + + public TokenizerRowSearchPosition getDifference() { + return difference; + } + + public TokenizerNode getMatchingNode() { + return matchingNode; + } + + public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) { + this.difference = difference; + this.matchingNode = matchingNode; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java new file mode 100644 index 0000000..f820258 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellScannerPosition.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An indicator of the state of the scanner after an operation such as nextCell() or + * positionAt(..). For example: + *
      + *
    • In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that + * it should load the next block.
    • + *
    • In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted. + *
    • + *
    • In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the + * next region.
    • + *
    + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum CellScannerPosition { + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first + * cell. + */ + BEFORE_FIRST, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell before the requested cell. + */ + BEFORE, + + /** + * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by + * positionAt(..). + */ + AT, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell after the requested cell. + */ + AFTER, + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect. + */ + AFTER_LAST + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java new file mode 100644 index 0000000..b0f1e27 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/CellSearcher.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; + +/** + * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that + * the implementation is able to navigate between cells without iterating through every cell. + */ +@InterfaceAudience.Private +public interface CellSearcher extends ReversibleCellScanner { + /** + * Reset any state in the scanner so it appears it was freshly opened. + */ + void resetToBeforeFirstEntry(); + + /** + * Do everything within this scanner's power to find the key. Look forward and backwards. + *

    + * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state. + *

    + * @param key position the CellScanner exactly on this key + * @return true if the cell existed and getCurrentCell() holds a valid cell + */ + boolean positionAt(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no + * exact match. + *

    + * @param key position the CellScanner on this key or the closest cell before + * @return AT if exact match
    + * BEFORE if on last cell before key
    + * BEFORE_FIRST if key was before the first cell in this scanner's scope + */ + CellScannerPosition positionAtOrBefore(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact + * match. + *

    + * @param key position the CellScanner on this key or the closest cell after + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition positionAtOrAfter(Cell key); + + /** + * Note: Added for backwards compatibility with + * {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek} + *

    + * Look for the key, but only look after the current position. Probably not needed for an + * efficient tree implementation, but is important for implementations without random access such + * as unencoded KeyValue blocks. + *

    + * @param key position the CellScanner exactly on this key + * @return true if getCurrent() holds a valid cell + */ + boolean seekForwardTo(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

    + * @param key + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrBefore(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

    + * @param key + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrAfter(Cell key); + + /** + * Note: This may not be appropriate to have in the interface. Need to investigate. + *

    + * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST. + * This is used by tests and for handling certain edge cases. + */ + void positionAfterLastCell(); + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java new file mode 100644 index 0000000..bb83ac5 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.CellScanner; + +/** + * An extension of CellScanner indicating the scanner supports iterating backwards through cells. + *

    + * Note: This was not added to suggest that HBase should support client facing reverse Scanners, + * but + * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing + * up if the positionAt(..) method goes past the requested cell. + */ +@InterfaceAudience.Private +public interface ReversibleCellScanner extends CellScanner { + + /** + * Try to position the scanner one Cell before the current position. + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
    + * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be + * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST} + */ + boolean previous(); + + /** + * Try to position the scanner in the row before the current row. + * @param endOfRow true for the last cell in the previous row; false for the first cell + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
    + * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be + * {@link org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST} + */ + boolean previousRow(boolean endOfRow); +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java new file mode 100644 index 0000000..570d489 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/ByteRangeSet.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.byterange; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +/** + * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted + * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage. + *

    + * Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and + * {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a + * trie-oriented ByteRangeTrieSet, etc + */ +@InterfaceAudience.Private +public abstract class ByteRangeSet { + + /******************** fields **********************/ + + protected byte[] byteAppender; + protected int numBytes; + + protected Map uniqueIndexByUniqueRange; + + protected ArrayList uniqueRanges; + protected int numUniqueRanges = 0; + + protected int[] uniqueRangeIndexByInsertionId; + protected int numInputs; + + protected List sortedIndexByUniqueIndex; + protected int[] sortedIndexByInsertionId; + protected ArrayList sortedRanges; + + + /****************** construct **********************/ + + protected ByteRangeSet() { + this.byteAppender = new byte[0]; + this.uniqueRanges = Lists.newArrayList(); + this.uniqueRangeIndexByInsertionId = new int[0]; + this.sortedIndexByUniqueIndex = Lists.newArrayList(); + this.sortedIndexByInsertionId = new int[0]; + this.sortedRanges = Lists.newArrayList(); + } + + public void reset() { + numBytes = 0; + uniqueIndexByUniqueRange.clear(); + numUniqueRanges = 0; + numInputs = 0; + sortedIndexByUniqueIndex.clear(); + sortedRanges.clear(); + } + + + /*************** abstract *************************/ + + public abstract void addToSortedRanges(); + + + /**************** methods *************************/ + + /** + * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and + * insert it into the tracking Map uniqueIndexByUniqueRange. + */ + public void add(ByteRange bytes) { + Integer index = uniqueIndexByUniqueRange.get(bytes); + if (index == null) { + index = store(bytes); + } + int minLength = numInputs + 1; + uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId, + minLength, 2 * minLength); + uniqueRangeIndexByInsertionId[numInputs] = index; + ++numInputs; + } + + protected int store(ByteRange bytes) { + int indexOfNewElement = numUniqueRanges; + if (uniqueRanges.size() <= numUniqueRanges) { + uniqueRanges.add(new ByteRange()); + } + ByteRange storedRange = uniqueRanges.get(numUniqueRanges); + int neededBytes = numBytes + bytes.getLength(); + byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes); + bytes.deepCopyTo(byteAppender, numBytes); + storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet + numBytes += bytes.getLength(); + uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement); + int newestUniqueIndex = numUniqueRanges; + ++numUniqueRanges; + return newestUniqueIndex; + } + + public ByteRangeSet compile() { + addToSortedRanges(); + for (int i = 0; i < sortedRanges.size(); ++i) { + sortedIndexByUniqueIndex.add(null);// need to grow the size + } + // TODO move this to an invert(int[]) util method + for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) { + int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i)); + sortedIndexByUniqueIndex.set(uniqueIndex, i); + } + sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs, + numInputs); + for (int i = 0; i < numInputs; ++i) { + int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i]; + int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex); + sortedIndexByInsertionId[i] = sortedIndex; + } + return this; + } + + public int getSortedIndexForInsertionId(int insertionId) { + return sortedIndexByInsertionId[insertionId]; + } + + public int size() { + return uniqueIndexByUniqueRange.size(); + } + + + /***************** standard methods ************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + int i = 0; + for (ByteRange r : sortedRanges) { + if (i > 0) { + sb.append("\n"); + } + sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray())); + ++i; + } + sb.append("\ntotalSize:" + numBytes); + sb.append("\navgSize:" + getAvgSize()); + return sb.toString(); + } + + + /**************** get/set *****************************/ + + public ArrayList getSortedRanges() { + return sortedRanges; + } + + public long getAvgSize() { + return numBytes / numUniqueRanges; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java new file mode 100644 index 0000000..ef14a68 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeHashSet.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.byterange.impl; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hadoop.hbase.util.byterange.ByteRangeSet; + +/** + * This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces + * garbage when adding a new element to it. We can probably create a tighter implementation without + * pointers or garbage. + */ +@InterfaceAudience.Private +public class ByteRangeHashSet extends ByteRangeSet { + + /************************ constructors *****************************/ + + public ByteRangeHashSet() { + this.uniqueIndexByUniqueRange = new HashMap(); + } + + public ByteRangeHashSet(List rawByteArrays) { + for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) { + add(in); + } + } + + @Override + public void addToSortedRanges() { + sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); + Collections.sort(sortedRanges); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java new file mode 100644 index 0000000..eb86bc9 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/byterange/impl/ByteRangeTreeSet.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.byterange.impl; + +import java.util.List; +import java.util.TreeMap; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hadoop.hbase.util.byterange.ByteRangeSet; + +/** + * Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet. + */ +@InterfaceAudience.Private +public class ByteRangeTreeSet extends ByteRangeSet { + + /************************ constructors *****************************/ + + public ByteRangeTreeSet() { + this.uniqueIndexByUniqueRange = new TreeMap(); + } + + public ByteRangeTreeSet(List rawByteArrays) { + this();//needed to initialize the TreeSet + for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){ + add(in); + } + } + + @Override + public void addToSortedRanges() { + sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java new file mode 100644 index 0000000..bec5deb --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UFIntTool.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * UFInt is an abbreviation for Unsigned Fixed-width Integer. + * + * This class converts between positive ints and 1-4 bytes that represent the int. All input ints + * must be positive. Max values stored in N bytes are: + * + * N=1: 2^8 => 256 + * N=2: 2^16 => 65,536 + * N=3: 2^24 => 16,777,216 + * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE) + * + * This was created to get most of the memory savings of a variable length integer when encoding + * an array of input integers, but to fix the number of bytes for each integer to the number needed + * to store the maximum integer in the array. This enables a binary search to be performed on the + * array of encoded integers. + * + * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if + * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the + * numbers will also require 2 bytes. + * + * warnings: + * * no input validation for max performance + * * no negatives + */ +@InterfaceAudience.Private +public class UFIntTool { + + private static final int NUM_BITS_IN_LONG = 64; + + public static long maxValueForNumBytes(int numBytes) { + return (1L << (numBytes * 8)) - 1; + } + + public static int numBytes(final long value) { + if (value == 0) {// 0 doesn't work with the formula below + return 1; + } + return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8; + } + + public static byte[] getBytes(int outputWidth, final long value) { + byte[] bytes = new byte[outputWidth]; + writeBytes(outputWidth, value, bytes, 0); + return bytes; + } + + public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) { + bytes[offset + outputWidth - 1] = (byte) value; + for (int i = outputWidth - 2; i >= 0; --i) { + bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8); + } + } + + private static final long[] MASKS = new long[] { + (long) 255, + (long) 255 << 8, + (long) 255 << 16, + (long) 255 << 24, + (long) 255 << 32, + (long) 255 << 40, + (long) 255 << 48, + (long) 255 << 56 + }; + + public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException { + for (int i = outputWidth - 1; i >= 0; --i) { + os.write((byte) ((value & MASKS[i]) >>> (8 * i))); + } + } + + public static long fromBytes(final byte[] bytes) { + long value = 0; + value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < bytes.length; ++i) { + value <<= 8; + value |= bytes[i] & 0xff; + } + return value; + } + + public static long fromBytes(final byte[] bytes, final int offset, final int width) { + long value = 0; + value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < width; ++i) { + value <<= 8; + value |= bytes[i + offset] & 0xff; + } + return value; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java new file mode 100644 index 0000000..ac22684 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVIntTool.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left + * bit of the current byte is 1, then there is at least one more byte. + */ +@InterfaceAudience.Private +public class UVIntTool { + + public static final byte + BYTE_7_RIGHT_BITS_SET = 127, + BYTE_LEFT_BIT_SET = -128; + + public static final long + INT_7_RIGHT_BITS_SET = 127, + INT_8TH_BIT_SET = 128; + + public static final byte[] + MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 }; + + /********************* int -> bytes **************************/ + + public static int numBytes(int in) { + if (in == 0) { + // doesn't work with the formula below + return 1; + } + return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1) + } + + public static byte[] getBytes(int value) { + int numBytes = numBytes(value); + byte[] bytes = new byte[numBytes]; + int remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET); + remainder >>= 7; + } + // do not set the left bit + bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET); + return bytes; + } + + public static int writeBytes(int value, OutputStream os) throws IOException { + int numBytes = numBytes(value); + int remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET)); + remainder >>= 7; + } + // do not set the left bit + os.write((byte) (remainder & INT_7_RIGHT_BITS_SET)); + return numBytes; + } + + /******************** bytes -> int **************************/ + + public static int getInt(byte[] bytes) { + return getInt(bytes, 0); + } + + public static int getInt(byte[] bytes, int offset) { + int value = 0; + for (int i = 0;; ++i) { + byte b = bytes[offset + i]; + int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + if (b >= 0) { + break; + } + } + return value; + } + + public static int getInt(InputStream is) throws IOException { + int value = 0; + int i = 0; + int b; + do{ + b = is.read(); + int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + ++i; + }while(b > Byte.MAX_VALUE); + return value; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java new file mode 100644 index 0000000..57dba17 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/util/vint/UVLongTool.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left + * bit of the current byte is 1, then there is at least one more byte. + */ +@InterfaceAudience.Private +public class UVLongTool{ + + public static final byte + BYTE_7_RIGHT_BITS_SET = 127, + BYTE_LEFT_BIT_SET = -128; + + public static final long + LONG_7_RIGHT_BITS_SET = 127, + LONG_8TH_BIT_SET = 128; + + public static final byte[] + MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 }; + + + /********************* long -> bytes **************************/ + + public static int numBytes(long in) {// do a check for illegal arguments if not protected + if (in == 0) { + return 1; + }// doesn't work with the formula below + return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1) + } + + public static byte[] getBytes(long value) { + int numBytes = numBytes(value); + byte[] bytes = new byte[numBytes]; + long remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit + remainder >>= 7; + } + bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit + return bytes; + } + + public static int writeBytes(long value, OutputStream os) throws IOException { + int numBytes = numBytes(value); + long remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET)); + remainder >>= 7; + } + // do not set the left bit + os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET)); + return numBytes; + } + + /******************** bytes -> long **************************/ + + public static long getLong(byte[] bytes) { + return getLong(bytes, 0); + } + + public static long getLong(byte[] bytes, int offset) { + long value = 0; + for (int i = 0;; ++i) { + byte b = bytes[offset + i]; + long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + if (b >= 0) { + break; + }// first bit was 0, so that's the last byte in the VarLong + } + return value; + } + + public static long getLong(InputStream is) throws IOException { + long value = 0; + int i = 0; + int b; + do { + b = is.read(); + long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + ++i; + } while (b > Byte.MAX_VALUE); + return value; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java deleted file mode 100644 index a696121..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java +++ /dev/null @@ -1,841 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; -import org.apache.hbase.util.vint.UVIntTool; -import org.apache.hbase.util.vint.UVLongTool; - -/** - * Information about the block. Stored at the beginning of the byte[]. Contains things - * like minimum timestamp and width of FInts in the row tree. - * - * Most fields stored in VInts that get decoded on the first access of each new block. - */ -@InterfaceAudience.Private -public class PrefixTreeBlockMeta { - - /******************* static fields ********************/ - - public static final int VERSION = 0; - - public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue - - public static final int - NUM_LONGS = 2, - NUM_INTS = 22, - NUM_SHORTS = 0,//keyValueTypeWidth not persisted - NUM_SINGLE_BYTES = 2, - MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS - + Bytes.SIZEOF_SHORT * NUM_SHORTS - + Bytes.SIZEOF_INT * NUM_INTS - + NUM_SINGLE_BYTES; - - - /**************** transient fields *********************/ - - protected int arrayOffset; - protected int bufferOffset; - - - /**************** persisted fields **********************/ - - // PrefixTree version to allow future format modifications - protected int version; - protected int numMetaBytes; - protected int numKeyValueBytes; - protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte - - // split the byte[] into 6 sections for the different data types - protected int numRowBytes; - protected int numFamilyBytes; - protected int numQualifierBytes; - protected int numTimestampBytes; - protected int numMvccVersionBytes; - protected int numValueBytes; - - // number of bytes in each section of fixed width FInts - protected int nextNodeOffsetWidth; - protected int familyOffsetWidth; - protected int qualifierOffsetWidth; - protected int timestampIndexWidth; - protected int mvccVersionIndexWidth; - protected int valueOffsetWidth; - protected int valueLengthWidth; - - // used to pre-allocate structures for reading - protected int rowTreeDepth; - protected int maxRowLength; - protected int maxQualifierLength; - - // the timestamp from which the deltas are calculated - protected long minTimestamp; - protected int timestampDeltaWidth; - protected long minMvccVersion; - protected int mvccVersionDeltaWidth; - - protected boolean allSameType; - protected byte allTypes; - - protected int numUniqueRows; - protected int numUniqueFamilies; - protected int numUniqueQualifiers; - - - /***************** constructors ********************/ - - public PrefixTreeBlockMeta() { - } - - public PrefixTreeBlockMeta(InputStream is) throws IOException{ - this.version = VERSION; - this.arrayOffset = 0; - this.bufferOffset = 0; - readVariableBytesFromInputStream(is); - } - - /** - * @param buffer positioned at start of PtBlockMeta - */ - public PrefixTreeBlockMeta(ByteBuffer buffer) { - initOnBlock(buffer); - } - - public void initOnBlock(ByteBuffer buffer) { - arrayOffset = buffer.arrayOffset(); - bufferOffset = buffer.position(); - readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset); - } - - - /**************** operate on each field **********************/ - - public int calculateNumMetaBytes(){ - int numBytes = 0; - numBytes += UVIntTool.numBytes(version); - numBytes += UVLongTool.numBytes(numMetaBytes); - numBytes += UVIntTool.numBytes(numKeyValueBytes); - ++numBytes;//os.write(getIncludesMvccVersion()); - - numBytes += UVIntTool.numBytes(numRowBytes); - numBytes += UVIntTool.numBytes(numFamilyBytes); - numBytes += UVIntTool.numBytes(numQualifierBytes); - numBytes += UVIntTool.numBytes(numTimestampBytes); - numBytes += UVIntTool.numBytes(numMvccVersionBytes); - numBytes += UVIntTool.numBytes(numValueBytes); - - numBytes += UVIntTool.numBytes(nextNodeOffsetWidth); - numBytes += UVIntTool.numBytes(familyOffsetWidth); - numBytes += UVIntTool.numBytes(qualifierOffsetWidth); - numBytes += UVIntTool.numBytes(timestampIndexWidth); - numBytes += UVIntTool.numBytes(mvccVersionIndexWidth); - numBytes += UVIntTool.numBytes(valueOffsetWidth); - numBytes += UVIntTool.numBytes(valueLengthWidth); - - numBytes += UVIntTool.numBytes(rowTreeDepth); - numBytes += UVIntTool.numBytes(maxRowLength); - numBytes += UVIntTool.numBytes(maxQualifierLength); - - numBytes += UVLongTool.numBytes(minTimestamp); - numBytes += UVIntTool.numBytes(timestampDeltaWidth); - numBytes += UVLongTool.numBytes(minMvccVersion); - numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth); - ++numBytes;//os.write(getAllSameTypeByte()); - ++numBytes;//os.write(allTypes); - - numBytes += UVIntTool.numBytes(numUniqueRows); - numBytes += UVIntTool.numBytes(numUniqueFamilies); - numBytes += UVIntTool.numBytes(numUniqueQualifiers); - return numBytes; - } - - public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{ - UVIntTool.writeBytes(version, os); - UVIntTool.writeBytes(numMetaBytes, os); - UVIntTool.writeBytes(numKeyValueBytes, os); - os.write(getIncludesMvccVersionByte()); - - UVIntTool.writeBytes(numRowBytes, os); - UVIntTool.writeBytes(numFamilyBytes, os); - UVIntTool.writeBytes(numQualifierBytes, os); - UVIntTool.writeBytes(numTimestampBytes, os); - UVIntTool.writeBytes(numMvccVersionBytes, os); - UVIntTool.writeBytes(numValueBytes, os); - - UVIntTool.writeBytes(nextNodeOffsetWidth, os); - UVIntTool.writeBytes(familyOffsetWidth, os); - UVIntTool.writeBytes(qualifierOffsetWidth, os); - UVIntTool.writeBytes(timestampIndexWidth, os); - UVIntTool.writeBytes(mvccVersionIndexWidth, os); - UVIntTool.writeBytes(valueOffsetWidth, os); - UVIntTool.writeBytes(valueLengthWidth, os); - - UVIntTool.writeBytes(rowTreeDepth, os); - UVIntTool.writeBytes(maxRowLength, os); - UVIntTool.writeBytes(maxQualifierLength, os); - - UVLongTool.writeBytes(minTimestamp, os); - UVIntTool.writeBytes(timestampDeltaWidth, os); - UVLongTool.writeBytes(minMvccVersion, os); - UVIntTool.writeBytes(mvccVersionDeltaWidth, os); - os.write(getAllSameTypeByte()); - os.write(allTypes); - - UVIntTool.writeBytes(numUniqueRows, os); - UVIntTool.writeBytes(numUniqueFamilies, os); - UVIntTool.writeBytes(numUniqueQualifiers, os); - } - - public void readVariableBytesFromInputStream(InputStream is) throws IOException{ - version = UVIntTool.getInt(is); - numMetaBytes = UVIntTool.getInt(is); - numKeyValueBytes = UVIntTool.getInt(is); - setIncludesMvccVersion((byte) is.read()); - - numRowBytes = UVIntTool.getInt(is); - numFamilyBytes = UVIntTool.getInt(is); - numQualifierBytes = UVIntTool.getInt(is); - numTimestampBytes = UVIntTool.getInt(is); - numMvccVersionBytes = UVIntTool.getInt(is); - numValueBytes = UVIntTool.getInt(is); - - nextNodeOffsetWidth = UVIntTool.getInt(is); - familyOffsetWidth = UVIntTool.getInt(is); - qualifierOffsetWidth = UVIntTool.getInt(is); - timestampIndexWidth = UVIntTool.getInt(is); - mvccVersionIndexWidth = UVIntTool.getInt(is); - valueOffsetWidth = UVIntTool.getInt(is); - valueLengthWidth = UVIntTool.getInt(is); - - rowTreeDepth = UVIntTool.getInt(is); - maxRowLength = UVIntTool.getInt(is); - maxQualifierLength = UVIntTool.getInt(is); - - minTimestamp = UVLongTool.getLong(is); - timestampDeltaWidth = UVIntTool.getInt(is); - minMvccVersion = UVLongTool.getLong(is); - mvccVersionDeltaWidth = UVIntTool.getInt(is); - - setAllSameType((byte) is.read()); - allTypes = (byte) is.read(); - - numUniqueRows = UVIntTool.getInt(is); - numUniqueFamilies = UVIntTool.getInt(is); - numUniqueQualifiers = UVIntTool.getInt(is); - } - - public void readVariableBytesFromArray(byte[] bytes, int offset) { - int position = offset; - - version = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(version); - numMetaBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numMetaBytes); - numKeyValueBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numKeyValueBytes); - setIncludesMvccVersion(bytes[position]); - ++position; - - numRowBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numRowBytes); - numFamilyBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numFamilyBytes); - numQualifierBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numQualifierBytes); - numTimestampBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numTimestampBytes); - numMvccVersionBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numMvccVersionBytes); - numValueBytes = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numValueBytes); - - nextNodeOffsetWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(nextNodeOffsetWidth); - familyOffsetWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(familyOffsetWidth); - qualifierOffsetWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(qualifierOffsetWidth); - timestampIndexWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(timestampIndexWidth); - mvccVersionIndexWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(mvccVersionIndexWidth); - valueOffsetWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(valueOffsetWidth); - valueLengthWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(valueLengthWidth); - - rowTreeDepth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(rowTreeDepth); - maxRowLength = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(maxRowLength); - maxQualifierLength = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(maxQualifierLength); - - minTimestamp = UVLongTool.getLong(bytes, position); - position += UVLongTool.numBytes(minTimestamp); - timestampDeltaWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(timestampDeltaWidth); - minMvccVersion = UVLongTool.getLong(bytes, position); - position += UVLongTool.numBytes(minMvccVersion); - mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(mvccVersionDeltaWidth); - - setAllSameType(bytes[position]); - ++position; - allTypes = bytes[position]; - ++position; - - numUniqueRows = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numUniqueRows); - numUniqueFamilies = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numUniqueFamilies); - numUniqueQualifiers = UVIntTool.getInt(bytes, position); - position += UVIntTool.numBytes(numUniqueQualifiers); - } - - //TODO method that can read directly from ByteBuffer instead of InputStream - - - /*************** methods *************************/ - - public int getKeyValueTypeWidth() { - return allSameType ? 0 : 1; - } - - public byte getIncludesMvccVersionByte() { - return includesMvccVersion ? (byte) 1 : (byte) 0; - } - - public void setIncludesMvccVersion(byte includesMvccVersionByte) { - includesMvccVersion = includesMvccVersionByte != 0; - } - - public byte getAllSameTypeByte() { - return allSameType ? (byte) 1 : (byte) 0; - } - - public void setAllSameType(byte allSameTypeByte) { - allSameType = allSameTypeByte != 0; - } - - public boolean isAllSameTimestamp() { - return timestampIndexWidth == 0; - } - - public boolean isAllSameMvccVersion() { - return mvccVersionIndexWidth == 0; - } - - public void setTimestampFields(LongEncoder encoder){ - this.minTimestamp = encoder.getMin(); - this.timestampIndexWidth = encoder.getBytesPerIndex(); - this.timestampDeltaWidth = encoder.getBytesPerDelta(); - this.numTimestampBytes = encoder.getTotalCompressedBytes(); - } - - public void setMvccVersionFields(LongEncoder encoder){ - this.minMvccVersion = encoder.getMin(); - this.mvccVersionIndexWidth = encoder.getBytesPerIndex(); - this.mvccVersionDeltaWidth = encoder.getBytesPerDelta(); - this.numMvccVersionBytes = encoder.getTotalCompressedBytes(); - } - - - /*************** Object methods *************************/ - - /** - * Generated by Eclipse - */ - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj; - if (allSameType != other.allSameType) - return false; - if (allTypes != other.allTypes) - return false; - if (arrayOffset != other.arrayOffset) - return false; - if (bufferOffset != other.bufferOffset) - return false; - if (valueLengthWidth != other.valueLengthWidth) - return false; - if (valueOffsetWidth != other.valueOffsetWidth) - return false; - if (familyOffsetWidth != other.familyOffsetWidth) - return false; - if (includesMvccVersion != other.includesMvccVersion) - return false; - if (maxQualifierLength != other.maxQualifierLength) - return false; - if (maxRowLength != other.maxRowLength) - return false; - if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth) - return false; - if (mvccVersionIndexWidth != other.mvccVersionIndexWidth) - return false; - if (minMvccVersion != other.minMvccVersion) - return false; - if (minTimestamp != other.minTimestamp) - return false; - if (nextNodeOffsetWidth != other.nextNodeOffsetWidth) - return false; - if (numValueBytes != other.numValueBytes) - return false; - if (numFamilyBytes != other.numFamilyBytes) - return false; - if (numMvccVersionBytes != other.numMvccVersionBytes) - return false; - if (numMetaBytes != other.numMetaBytes) - return false; - if (numQualifierBytes != other.numQualifierBytes) - return false; - if (numRowBytes != other.numRowBytes) - return false; - if (numTimestampBytes != other.numTimestampBytes) - return false; - if (numUniqueFamilies != other.numUniqueFamilies) - return false; - if (numUniqueQualifiers != other.numUniqueQualifiers) - return false; - if (numUniqueRows != other.numUniqueRows) - return false; - if (numKeyValueBytes != other.numKeyValueBytes) - return false; - if (qualifierOffsetWidth != other.qualifierOffsetWidth) - return false; - if (rowTreeDepth != other.rowTreeDepth) - return false; - if (timestampDeltaWidth != other.timestampDeltaWidth) - return false; - if (timestampIndexWidth != other.timestampIndexWidth) - return false; - if (version != other.version) - return false; - return true; - } - - /** - * Generated by Eclipse - */ - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (allSameType ? 1231 : 1237); - result = prime * result + allTypes; - result = prime * result + arrayOffset; - result = prime * result + bufferOffset; - result = prime * result + valueLengthWidth; - result = prime * result + valueOffsetWidth; - result = prime * result + familyOffsetWidth; - result = prime * result + (includesMvccVersion ? 1231 : 1237); - result = prime * result + maxQualifierLength; - result = prime * result + maxRowLength; - result = prime * result + mvccVersionDeltaWidth; - result = prime * result + mvccVersionIndexWidth; - result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32)); - result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32)); - result = prime * result + nextNodeOffsetWidth; - result = prime * result + numValueBytes; - result = prime * result + numFamilyBytes; - result = prime * result + numMvccVersionBytes; - result = prime * result + numMetaBytes; - result = prime * result + numQualifierBytes; - result = prime * result + numRowBytes; - result = prime * result + numTimestampBytes; - result = prime * result + numUniqueFamilies; - result = prime * result + numUniqueQualifiers; - result = prime * result + numUniqueRows; - result = prime * result + numKeyValueBytes; - result = prime * result + qualifierOffsetWidth; - result = prime * result + rowTreeDepth; - result = prime * result + timestampDeltaWidth; - result = prime * result + timestampIndexWidth; - result = prime * result + version; - return result; - } - - /** - * Generated by Eclipse - */ - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - builder.append("PtBlockMeta [arrayOffset="); - builder.append(arrayOffset); - builder.append(", bufferOffset="); - builder.append(bufferOffset); - builder.append(", version="); - builder.append(version); - builder.append(", numMetaBytes="); - builder.append(numMetaBytes); - builder.append(", numKeyValueBytes="); - builder.append(numKeyValueBytes); - builder.append(", includesMvccVersion="); - builder.append(includesMvccVersion); - builder.append(", numRowBytes="); - builder.append(numRowBytes); - builder.append(", numFamilyBytes="); - builder.append(numFamilyBytes); - builder.append(", numQualifierBytes="); - builder.append(numQualifierBytes); - builder.append(", numTimestampBytes="); - builder.append(numTimestampBytes); - builder.append(", numMvccVersionBytes="); - builder.append(numMvccVersionBytes); - builder.append(", numValueBytes="); - builder.append(numValueBytes); - builder.append(", nextNodeOffsetWidth="); - builder.append(nextNodeOffsetWidth); - builder.append(", familyOffsetWidth="); - builder.append(familyOffsetWidth); - builder.append(", qualifierOffsetWidth="); - builder.append(qualifierOffsetWidth); - builder.append(", timestampIndexWidth="); - builder.append(timestampIndexWidth); - builder.append(", mvccVersionIndexWidth="); - builder.append(mvccVersionIndexWidth); - builder.append(", valueOffsetWidth="); - builder.append(valueOffsetWidth); - builder.append(", valueLengthWidth="); - builder.append(valueLengthWidth); - builder.append(", rowTreeDepth="); - builder.append(rowTreeDepth); - builder.append(", maxRowLength="); - builder.append(maxRowLength); - builder.append(", maxQualifierLength="); - builder.append(maxQualifierLength); - builder.append(", minTimestamp="); - builder.append(minTimestamp); - builder.append(", timestampDeltaWidth="); - builder.append(timestampDeltaWidth); - builder.append(", minMvccVersion="); - builder.append(minMvccVersion); - builder.append(", mvccVersionDeltaWidth="); - builder.append(mvccVersionDeltaWidth); - builder.append(", allSameType="); - builder.append(allSameType); - builder.append(", allTypes="); - builder.append(allTypes); - builder.append(", numUniqueRows="); - builder.append(numUniqueRows); - builder.append(", numUniqueFamilies="); - builder.append(numUniqueFamilies); - builder.append(", numUniqueQualifiers="); - builder.append(numUniqueQualifiers); - builder.append("]"); - return builder.toString(); - } - - - /************** absolute getters *******************/ - - public int getAbsoluteMetaOffset() { - return arrayOffset + bufferOffset; - } - - public int getAbsoluteRowOffset() { - return getAbsoluteMetaOffset() + numMetaBytes; - } - - public int getAbsoluteFamilyOffset() { - return getAbsoluteRowOffset() + numRowBytes; - } - - public int getAbsoluteQualifierOffset() { - return getAbsoluteFamilyOffset() + numFamilyBytes; - } - - public int getAbsoluteTimestampOffset() { - return getAbsoluteQualifierOffset() + numQualifierBytes; - } - - public int getAbsoluteMvccVersionOffset() { - return getAbsoluteTimestampOffset() + numTimestampBytes; - } - - public int getAbsoluteValueOffset() { - return getAbsoluteMvccVersionOffset() + numMvccVersionBytes; - } - - - /*************** get/set ***************************/ - - public int getTimestampDeltaWidth() { - return timestampDeltaWidth; - } - - public void setTimestampDeltaWidth(int timestampDeltaWidth) { - this.timestampDeltaWidth = timestampDeltaWidth; - } - - public int getValueOffsetWidth() { - return valueOffsetWidth; - } - - public void setValueOffsetWidth(int dataOffsetWidth) { - this.valueOffsetWidth = dataOffsetWidth; - } - - public int getValueLengthWidth() { - return valueLengthWidth; - } - - public void setValueLengthWidth(int dataLengthWidth) { - this.valueLengthWidth = dataLengthWidth; - } - - public int getMaxRowLength() { - return maxRowLength; - } - - public void setMaxRowLength(int maxRowLength) { - this.maxRowLength = maxRowLength; - } - - public long getMinTimestamp() { - return minTimestamp; - } - - public void setMinTimestamp(long minTimestamp) { - this.minTimestamp = minTimestamp; - } - - public byte getAllTypes() { - return allTypes; - } - - public void setAllTypes(byte allTypes) { - this.allTypes = allTypes; - } - - public boolean isAllSameType() { - return allSameType; - } - - public void setAllSameType(boolean allSameType) { - this.allSameType = allSameType; - } - - public int getNextNodeOffsetWidth() { - return nextNodeOffsetWidth; - } - - public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) { - this.nextNodeOffsetWidth = nextNodeOffsetWidth; - } - - public int getNumRowBytes() { - return numRowBytes; - } - - public void setNumRowBytes(int numRowBytes) { - this.numRowBytes = numRowBytes; - } - - public int getNumTimestampBytes() { - return numTimestampBytes; - } - - public void setNumTimestampBytes(int numTimestampBytes) { - this.numTimestampBytes = numTimestampBytes; - } - - public int getNumValueBytes() { - return numValueBytes; - } - - public void setNumValueBytes(int numValueBytes) { - this.numValueBytes = numValueBytes; - } - - public int getNumMetaBytes() { - return numMetaBytes; - } - - public void setNumMetaBytes(int numMetaBytes) { - this.numMetaBytes = numMetaBytes; - } - - public int getArrayOffset() { - return arrayOffset; - } - - public void setArrayOffset(int arrayOffset) { - this.arrayOffset = arrayOffset; - } - - public int getBufferOffset() { - return bufferOffset; - } - - public void setBufferOffset(int bufferOffset) { - this.bufferOffset = bufferOffset; - } - - public int getNumKeyValueBytes() { - return numKeyValueBytes; - } - - public void setNumKeyValueBytes(int numKeyValueBytes) { - this.numKeyValueBytes = numKeyValueBytes; - } - - public int getRowTreeDepth() { - return rowTreeDepth; - } - - public void setRowTreeDepth(int rowTreeDepth) { - this.rowTreeDepth = rowTreeDepth; - } - - public int getNumMvccVersionBytes() { - return numMvccVersionBytes; - } - - public void setNumMvccVersionBytes(int numMvccVersionBytes) { - this.numMvccVersionBytes = numMvccVersionBytes; - } - - public int getMvccVersionDeltaWidth() { - return mvccVersionDeltaWidth; - } - - public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) { - this.mvccVersionDeltaWidth = mvccVersionDeltaWidth; - } - - public long getMinMvccVersion() { - return minMvccVersion; - } - - public void setMinMvccVersion(long minMvccVersion) { - this.minMvccVersion = minMvccVersion; - } - - public int getNumFamilyBytes() { - return numFamilyBytes; - } - - public void setNumFamilyBytes(int numFamilyBytes) { - this.numFamilyBytes = numFamilyBytes; - } - - public int getFamilyOffsetWidth() { - return familyOffsetWidth; - } - - public void setFamilyOffsetWidth(int familyOffsetWidth) { - this.familyOffsetWidth = familyOffsetWidth; - } - - public int getNumUniqueRows() { - return numUniqueRows; - } - - public void setNumUniqueRows(int numUniqueRows) { - this.numUniqueRows = numUniqueRows; - } - - public int getNumUniqueFamilies() { - return numUniqueFamilies; - } - - public void setNumUniqueFamilies(int numUniqueFamilies) { - this.numUniqueFamilies = numUniqueFamilies; - } - - public int getNumUniqueQualifiers() { - return numUniqueQualifiers; - } - - public void setNumUniqueQualifiers(int numUniqueQualifiers) { - this.numUniqueQualifiers = numUniqueQualifiers; - } - - public int getNumQualifierBytes() { - return numQualifierBytes; - } - - public void setNumQualifierBytes(int numQualifierBytes) { - this.numQualifierBytes = numQualifierBytes; - } - - public int getQualifierOffsetWidth() { - return qualifierOffsetWidth; - } - - public void setQualifierOffsetWidth(int qualifierOffsetWidth) { - this.qualifierOffsetWidth = qualifierOffsetWidth; - } - - public int getMaxQualifierLength() { - return maxQualifierLength; - } - - public void setMaxQualifierLength(int maxQualifierLength) { - this.maxQualifierLength = maxQualifierLength; - } - - public int getTimestampIndexWidth() { - return timestampIndexWidth; - } - - public void setTimestampIndexWidth(int timestampIndexWidth) { - this.timestampIndexWidth = timestampIndexWidth; - } - - public int getMvccVersionIndexWidth() { - return mvccVersionIndexWidth; - } - - public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) { - this.mvccVersionIndexWidth = mvccVersionIndexWidth; - } - - public int getVersion() { - return version; - } - - public void setVersion(int version) { - this.version = version; - } - - public boolean isIncludesMvccVersion() { - return includesMvccVersion; - } - - public void setIncludesMvccVersion(boolean includesMvccVersion) { - this.includesMvccVersion = includesMvccVersion; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java deleted file mode 100644 index d6a80b2..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree; - -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValue.KeyComparator; -import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator; -import org.apache.hadoop.hbase.KeyValue.RootKeyComparator; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; -import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; -import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; -import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; -import org.apache.hadoop.hbase.io.hfile.BlockType; -import org.apache.hadoop.hbase.util.ByteBufferUtils; -import org.apache.hadoop.io.RawComparator; -import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; -import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; -import org.apache.hbase.codec.prefixtree.encode.EncoderFactory; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; - -/** - * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or - * package changes. - *

    - * PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point - * for PrefixTree encoding and decoding. Encoding is delegated to instances of - * {@link PrefixTreeEncoder}, and decoding is delegated to instances of - * {@link org.apache.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are - * created and recycled by static PtEncoderFactory and PtDecoderFactory. - */ -@InterfaceAudience.Private -public class PrefixTreeCodec implements DataBlockEncoder{ - - /** - * no-arg constructor for reflection - */ - public PrefixTreeCodec() { - } - - /** - * Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar - * enough with the concept of the HFileBlockEncodingContext. - */ - @Override - public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion, - HFileBlockEncodingContext blkEncodingCtx) throws IOException { - if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { - throw new IOException(this.getClass().getName() + " only accepts " - + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context."); - } - - HFileBlockDefaultEncodingContext encodingCtx - = (HFileBlockDefaultEncodingContext) blkEncodingCtx; - encodingCtx.prepareEncoding(); - DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder(); - internalEncodeKeyValues(dataOut, in, includesMvccVersion); - - //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE? - if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { - encodingCtx.postEncoding(BlockType.ENCODED_DATA); - } else { - encodingCtx.postEncoding(BlockType.DATA); - } - } - - private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, - ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException { - rawKeyValues.rewind(); - PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); - - try{ - KeyValue kv; - while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) { - builder.write(kv); - } - builder.flush(); - }finally{ - EncoderFactory.checkIn(builder); - } - } - - - @Override - public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion) - throws IOException { - return decodeKeyValues(source, 0, 0, includesMvccVersion); - } - - - /** - * I don't think this method is called during normal HBase operation, so efficiency is not - * important. - */ - @Override - public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength, - int skipLastBytes, boolean includesMvccVersion) throws IOException { - ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste - sourceAsBuffer.mark(); - PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer); - sourceAsBuffer.rewind(); - int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes(); - byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader]; - ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader); - result.rewind(); - CellSearcher searcher = null; - try { - searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion); - while (searcher.advance()) { - KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current()); - // needs to be modified for DirectByteBuffers. no existing methods to - // write VLongs to byte[] - int offset = result.arrayOffset() + result.position(); - KeyValueUtil.appendToByteArray(currentCell, result.array(), offset); - int keyValueLength = KeyValueUtil.length(currentCell); - ByteBufferUtils.skip(result, keyValueLength); - offset += keyValueLength; - if (includesMvccVersion) { - ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion()); - } - } - result.position(result.limit());//make it appear as if we were appending - return result; - } finally { - DecoderFactory.checkIn(searcher); - } - } - - - @Override - public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { - block.rewind(); - PrefixTreeArraySearcher searcher = null; - try { - //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will - searcher = DecoderFactory.checkOut(block, true); - if (!searcher.positionAtFirstCell()) { - return null; - } - return KeyValueUtil.copyKeyToNewByteBuffer(searcher.current()); - } finally { - DecoderFactory.checkIn(searcher); - } - } - - @Override - public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm, - DataBlockEncoding encoding, byte[] header) { - if(DataBlockEncoding.PREFIX_TREE != encoding){ - //i'm not sure why encoding is in the interface. Each encoder implementation should probably - //know it's encoding type - throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported"); - } - return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header); - } - - @Override - public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) { - return new HFileBlockDefaultDecodingContext(compressionAlgorithm); - } - - /** - * Is this the correct handling of an illegal comparator? How to prevent that from getting all - * the way to this point. - */ - @Override - public EncodedSeeker createSeeker(RawComparator comparator, boolean includesMvccVersion) { - if(! (comparator instanceof KeyComparator)){ - throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator"); - } - if(comparator instanceof MetaKeyComparator){ - throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META " - +"table"); - } - if(comparator instanceof RootKeyComparator){ - throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT " - +"table"); - } - - return new PrefixTreeSeeker(includesMvccVersion); - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java deleted file mode 100644 index 85c6484..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree; - -import java.nio.ByteBuffer; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; -import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; -import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; - -/** - * These methods have the same definition as any implementation of the EncodedSeeker. - * - * In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It - * currently returns a new KeyValue object each time getKeyValue is called. This is not horrible, - * but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in - * the data from the PrefixTreeCell. It is somewhat heavyweight right now. - */ -@InterfaceAudience.Private -public class PrefixTreeSeeker implements EncodedSeeker { - - protected ByteBuffer block; - protected boolean includeMvccVersion; - protected PrefixTreeArraySearcher ptSearcher; - - public PrefixTreeSeeker(boolean includeMvccVersion) { - this.includeMvccVersion = includeMvccVersion; - } - - @Override - public void setCurrentBuffer(ByteBuffer fullBlockBuffer) { - block = fullBlockBuffer; - ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion); - rewind(); - } - - /** - * Currently unused. - *

    - * TODO performance leak. should reuse the searchers. hbase does not currently have a hook where - * this can be called - */ - public void releaseCurrentSearcher(){ - DecoderFactory.checkIn(ptSearcher); - } - - - @Override - public ByteBuffer getKeyDeepCopy() { - return KeyValueUtil.copyKeyToNewByteBuffer(ptSearcher.current()); - } - - - @Override - public ByteBuffer getValueShallowCopy() { - return CellUtil.getValueBufferShallowCopy(ptSearcher.current()); - } - - /** - * currently must do deep copy into new array - */ - @Override - public ByteBuffer getKeyValueBuffer() { - return KeyValueUtil.copyToNewByteBuffer(ptSearcher.current()); - } - - /** - * currently must do deep copy into new array - */ - @Override - public KeyValue getKeyValue() { - return KeyValueUtil.copyToNewKeyValue(ptSearcher.current()); - } - - /** - * Currently unused. - *

    - * A nice, lightweight reference, though the underlying cell is transient. This method may return - * the same reference to the backing PrefixTreeCell repeatedly, while other implementations may - * return a different reference for each Cell. - *

    - * The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to - * use this method instead of the getKeyValue() methods above. - */ - public Cell get() { - return ptSearcher.current(); - } - - @Override - public void rewind() { - ptSearcher.positionAtFirstCell(); - } - - @Override - public boolean next() { - return ptSearcher.advance(); - } - -// @Override - public boolean advance() { - return ptSearcher.advance(); - } - - - private static final boolean USE_POSITION_BEFORE = false; - - /** - * Seek forward only (should be called reseekToKeyInBlock?). - *

    - * If the exact key is found look at the seekBefore variable and:
    - * - if true: go to the previous key if it's true
    - * - if false: stay on the exact key - *

    - * If the exact key is not found, then go to the previous key *if possible*, but remember to - * leave the scanner in a valid state if possible. - *

    - * @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker - * @param offset offset into the keyOnlyBytes array - * @param length number of bytes of the keyOnlyBytes array to use - * @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up 1 Cell - * @return 0 if the seeker is on the exact key
    - * 1 if the seeker is not on the key for any reason, including seekBefore being true - */ - @Override - public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length, - boolean forceBeforeOnExactMatch) { - if (USE_POSITION_BEFORE) { - return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length, - forceBeforeOnExactMatch); - }else{ - return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length, - forceBeforeOnExactMatch); - } - } - - - - /* - * Support both of these options since the underlying PrefixTree supports both. Possibly - * expand the EncodedSeeker to utilize them both. - */ - - protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length, - boolean forceBeforeOnExactMatch){ - // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell - KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); - - CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv); - - if(CellScannerPosition.AT == position){ - if (forceBeforeOnExactMatch) { - ptSearcher.previous(); - return 1; - } - return 0; - } - - return 1; - } - - - protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length, - boolean forceBeforeOnExactMatch){ - // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell - KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); - - //should probably switch this to use the seekForwardToOrBefore method - CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv); - - if(CellScannerPosition.AT == position){ - if (forceBeforeOnExactMatch) { - ptSearcher.previous(); - return 1; - } - return 0; - - } - - if(CellScannerPosition.AFTER == position){ - if(!ptSearcher.isBeforeFirst()){ - ptSearcher.previous(); - } - return 1; - } - - if(position == CellScannerPosition.AFTER_LAST){ - return 1; - } - - throw new RuntimeException("unexpected CellScannerPosition:"+position); - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java deleted file mode 100644 index 8f49ec1..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import java.nio.ByteBuffer; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingQueue; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Pools PrefixTreeArraySearcher objects. Each Searcher can consist of hundreds or thousands of - * objects and 1 is needed for each HFile during a Get operation. With tens of thousands of - * Gets/second, reusing these searchers may save a lot of young gen collections. - *

    - * Alternative implementation would be a ByteBufferSearcherPool (not implemented yet). - */ -@InterfaceAudience.Private -public class ArraySearcherPool { - - /** - * One decoder is needed for each storefile for each Get operation so we may need hundreds at the - * same time, however, decoding is a CPU bound activity so should limit this to something in the - * realm of maximum reasonable active threads. - */ - private static final Integer MAX_POOL_SIZE = 1000; - - protected Queue pool - = new LinkedBlockingQueue(MAX_POOL_SIZE); - - public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) { - PrefixTreeArraySearcher searcher = pool.poll();//will return null if pool is empty - searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion); - return searcher; - } - - public void checkIn(PrefixTreeArraySearcher searcher) { - searcher.releaseBlockReference(); - pool.offer(searcher); - } - - @Override - public String toString() { - return ("poolSize:" + pool.size()); - } - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java deleted file mode 100644 index 7d98fde..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import java.nio.ByteBuffer; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; - -/** - * Static wrapper class for the ArraySearcherPool. - */ -@InterfaceAudience.Private -public class DecoderFactory { - - private static final ArraySearcherPool POOL = new ArraySearcherPool(); - - //TODO will need a PrefixTreeSearcher on top of CellSearcher - public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer, - boolean includeMvccVersion) { - if (buffer.isDirect()) { - throw new IllegalArgumentException("DirectByteBuffers not supported yet"); - // TODO implement PtByteBufferBlockScanner - } - - PrefixTreeArraySearcher searcher = POOL.checkOut(buffer, - includeMvccVersion); - return searcher; - } - - public static void checkIn(CellSearcher pSearcher) { - if (pSearcher == null) { - return; - } - if (! (pSearcher instanceof PrefixTreeArraySearcher)) { - throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to " - +DecoderFactory.class); - } - PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher; - POOL.checkIn(searcher); - } - - - /**************************** helper ******************************/ - - public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer, - PrefixTreeArraySearcher searcher, boolean includeMvccVersion) { - if (searcher == null) { - PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer); - searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(), - blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength()); - searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); - return searcher; - } - - PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta(); - blockMeta.initOnBlock(buffer); - if (!searcher.areBuffersBigEnough()) { - int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(), - searcher.getMaxRowTreeStackNodes()); - int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength()); - int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(), - searcher.getQualifierBufferLength()); - searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength, - qualifierBufferLength); - } - //this is where we parse the BlockMeta - searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); - return searcher; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java deleted file mode 100644 index 1ce90e6..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.scanner.ReversibleCellScanner; - -/** - * Methods for going backwards through a PrefixTree block. This class is split out on its own to - * simplify the Scanner superclass and Searcher subclass. - */ -@InterfaceAudience.Private -public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements - ReversibleCellScanner { - - /***************** construct ******************************/ - - public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { - super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); - } - - - /***************** Object methods ***************************/ - - @Override - public boolean equals(Object obj) { - //trivial override to confirm intent (findbugs) - return super.equals(obj); - } - - - /***************** methods **********************************/ - - @Override - public boolean previous() { - if (afterLast) { - afterLast = false; - positionAtLastCell(); - return true; - } - if (beforeFirst) { - return false; - } - if (isFirstCellInRow()) { - previousRowInternal(); - if (beforeFirst) { - return false; - } - populateLastNonRowFields(); - return true; - } - populatePreviousNonRowFields(); - return true; - } - - @Override - public boolean previousRow(boolean endOfRow) { - previousRowInternal(); - if(beforeFirst){ - return false; - } - if(endOfRow){ - populateLastNonRowFields(); - }else{ - populateFirstNonRowFields(); - } - return true; - } - - private boolean previousRowInternal() { - if (beforeFirst) { - return false; - } - if (afterLast) { - positionAtLastRow(); - return true; - } - if (currentRowNode.hasOccurrences()) { - discardCurrentRowNode(false); - if(currentRowNode==null){ - return false; - } - } - while (!beforeFirst) { - if (isDirectlyAfterNub()) {//we are about to back up to the nub - currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf - nubCellsRemain = true;//this positions us on the nub - return true; - } - if (currentRowNode.hasPreviousFanNodes()) { - followPreviousFan(); - descendToLastRowFromCurrentPosition(); - } else {// keep going up the stack until we find previous fan positions - discardCurrentRowNode(false); - if(currentRowNode==null){ - return false; - } - } - if (currentRowNode.hasOccurrences()) {// escape clause - return true;// found some values - } - } - return false;// went past the beginning - } - - protected boolean isDirectlyAfterNub() { - return currentRowNode.isNub() && currentRowNode.getFanIndex()==0; - } - - protected void positionAtLastRow() { - reInitFirstNode(); - descendToLastRowFromCurrentPosition(); - } - - protected void descendToLastRowFromCurrentPosition() { - while (currentRowNode.hasChildren()) { - followLastFan(); - } - } - - protected void positionAtLastCell() { - positionAtLastRow(); - populateLastNonRowFields(); - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java deleted file mode 100644 index 398bd5d..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellScanner; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader; -import org.apache.hbase.codec.prefixtree.decode.row.RowNodeReader; -import org.apache.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder; -import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; - -/** - * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and - * call get/set methods. - * - * This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This - * implementation requires that the bytes be in a normal java byte[] for performance. The - * alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer - * without copying the whole buffer on-heap. - */ -@InterfaceAudience.Private -public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner { - - /***************** fields ********************************/ - - protected PrefixTreeBlockMeta blockMeta; - - protected boolean beforeFirst; - protected boolean afterLast; - - protected RowNodeReader[] rowNodes; - protected int rowNodeStackIndex; - - protected RowNodeReader currentRowNode; - protected ColumnReader familyReader; - protected ColumnReader qualifierReader; - protected TimestampDecoder timestampDecoder; - protected MvccVersionDecoder mvccVersionDecoder; - - protected boolean nubCellsRemain; - protected int currentCellIndex; - - - /*********************** construct ******************************/ - - // pass in blockMeta so we can initialize buffers big enough for all cells in the block - public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { - this.rowNodes = new RowNodeReader[rowTreeDepth]; - for (int i = 0; i < rowNodes.length; ++i) { - rowNodes[i] = new RowNodeReader(); - } - this.rowBuffer = new byte[rowBufferLength]; - this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH]; - this.familyReader = new ColumnReader(familyBuffer, true); - this.qualifierBuffer = new byte[qualifierBufferLength]; - this.qualifierReader = new ColumnReader(qualifierBuffer, false); - this.timestampDecoder = new TimestampDecoder(); - this.mvccVersionDecoder = new MvccVersionDecoder(); - } - - - /**************** init helpers ***************************************/ - - /** - * Call when first accessing a block. - * @return entirely new scanner if false - */ - public boolean areBuffersBigEnough() { - if (rowNodes.length < blockMeta.getRowTreeDepth()) { - return false; - } - if (rowBuffer.length < blockMeta.getMaxRowLength()) { - return false; - } - if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) { - return false; - } - return true; - } - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, - boolean includeMvccVersion) { - this.block = block; - this.blockMeta = blockMeta; - this.familyOffset = familyBuffer.length; - this.familyReader.initOnBlock(blockMeta, block); - this.qualifierOffset = qualifierBuffer.length; - this.qualifierReader.initOnBlock(blockMeta, block); - this.timestampDecoder.initOnBlock(blockMeta, block); - this.mvccVersionDecoder.initOnBlock(blockMeta, block); - this.includeMvccVersion = includeMvccVersion; - resetToBeforeFirstEntry(); - } - - // Does this have to be in the CellScanner Interface? TODO - public void resetToBeforeFirstEntry() { - beforeFirst = true; - afterLast = false; - rowNodeStackIndex = -1; - currentRowNode = null; - rowLength = 0; - familyOffset = familyBuffer.length; - familyLength = 0; - qualifierOffset = blockMeta.getMaxQualifierLength(); - qualifierLength = 0; - nubCellsRemain = false; - currentCellIndex = -1; - timestamp = -1L; - type = DEFAULT_TYPE; - absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized - valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length - } - - /** - * Call this before putting the scanner back into a pool so it doesn't hold the last used block - * in memory. - */ - public void releaseBlockReference(){ - block = null; - } - - - /********************** CellScanner **********************/ - - @Override - public Cell current() { - if(isOutOfBounds()){ - return null; - } - return (Cell)this; - } - - /******************* Object methods ************************/ - - @Override - public boolean equals(Object obj) { - //trivial override to confirm intent (findbugs) - return super.equals(obj); - } - - @Override - public int hashCode() { - return super.hashCode(); - } - - /** - * Override PrefixTreeCell.toString() with a check to see if the current cell is valid. - */ - @Override - public String toString() { - Cell currentCell = current(); - if(currentCell==null){ - return "null"; - } - return ((PrefixTreeCell)currentCell).getKeyValueString(); - } - - - /******************* advance ***************************/ - - public boolean positionAtFirstCell() { - reInitFirstNode(); - return advance(); - } - - @Override - public boolean advance() { - if (afterLast) { - return false; - } - if (!hasOccurrences()) { - resetToBeforeFirstEntry(); - } - if (beforeFirst || isLastCellInRow()) { - nextRow(); - if (afterLast) { - return false; - } - } else { - ++currentCellIndex; - } - - populateNonRowFields(currentCellIndex); - return true; - } - - - public boolean nextRow() { - nextRowInternal(); - if (afterLast) { - return false; - } - populateNonRowFields(currentCellIndex); - return true; - } - - - /** - * This method is safe to call when the scanner is not on a fully valid row node, as in the case - * of a row token miss in the Searcher - * @return true if we are positioned on a valid row, false if past end of block - */ - protected boolean nextRowInternal() { - if (afterLast) { - return false; - } - if (beforeFirst) { - initFirstNode(); - if (currentRowNode.hasOccurrences()) { - if (currentRowNode.isNub()) { - nubCellsRemain = true; - } - currentCellIndex = 0; - return true; - } - } - if (currentRowNode.isLeaf()) { - discardCurrentRowNode(true); - } - while (!afterLast) { - if (nubCellsRemain) { - nubCellsRemain = false; - } - if (currentRowNode.hasMoreFanNodes()) { - followNextFan(); - if (currentRowNode.hasOccurrences()) { - currentCellIndex = 0; - return true; - }// found some values - } else { - discardCurrentRowNode(true); - } - } - return false;// went past the end - } - - - /**************** secondary traversal methods ******************************/ - - protected void reInitFirstNode() { - resetToBeforeFirstEntry(); - initFirstNode(); - } - - protected void initFirstNode() { - int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset(); - rowNodeStackIndex = 0; - currentRowNode = rowNodes[0]; - currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure); - appendCurrentTokenToRowBuffer(); - beforeFirst = false; - } - - protected void followFirstFan() { - followFan(0); - } - - protected void followPreviousFan() { - int nextFanPosition = currentRowNode.getFanIndex() - 1; - followFan(nextFanPosition); - } - - protected void followCurrentFan() { - int currentFanPosition = currentRowNode.getFanIndex(); - followFan(currentFanPosition); - } - - protected void followNextFan() { - int nextFanPosition = currentRowNode.getFanIndex() + 1; - followFan(nextFanPosition); - } - - protected void followLastFan() { - followFan(currentRowNode.getLastFanIndex()); - } - - protected void followFan(int fanIndex) { - currentRowNode.setFanIndex(fanIndex); - appendToRowBuffer(currentRowNode.getFanByte(fanIndex)); - - int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset() - + currentRowNode.getNextNodeOffset(fanIndex, blockMeta); - ++rowNodeStackIndex; - - currentRowNode = rowNodes[rowNodeStackIndex]; - currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure); - - //TODO getToken is spewing garbage - appendCurrentTokenToRowBuffer(); - if (currentRowNode.isNub()) { - nubCellsRemain = true; - } - currentCellIndex = 0; - } - - /** - * @param forwards which marker to set if we overflow - */ - protected void discardCurrentRowNode(boolean forwards) { - RowNodeReader rowNodeBeingPopped = currentRowNode; - --rowNodeStackIndex;// pop it off the stack - if (rowNodeStackIndex < 0) { - currentRowNode = null; - if (forwards) { - markAfterLast(); - } else { - markBeforeFirst(); - } - return; - } - popFromRowBuffer(rowNodeBeingPopped); - currentRowNode = rowNodes[rowNodeStackIndex]; - } - - protected void markBeforeFirst() { - beforeFirst = true; - afterLast = false; - currentRowNode = null; - } - - protected void markAfterLast() { - beforeFirst = false; - afterLast = true; - currentRowNode = null; - } - - - /***************** helper methods **************************/ - - protected void appendCurrentTokenToRowBuffer() { - System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength, - currentRowNode.getTokenLength()); - rowLength += currentRowNode.getTokenLength(); - } - - protected void appendToRowBuffer(byte b) { - rowBuffer[rowLength] = b; - ++rowLength; - } - - protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) { - rowLength -= rowNodeBeingPopped.getTokenLength(); - --rowLength; // pop the parent's fan byte - } - - protected boolean hasOccurrences() { - return currentRowNode != null && currentRowNode.hasOccurrences(); - } - - protected boolean isBranch() { - return currentRowNode != null && !currentRowNode.hasOccurrences() - && currentRowNode.hasChildren(); - } - - protected boolean isNub() { - return currentRowNode != null && currentRowNode.hasOccurrences() - && currentRowNode.hasChildren(); - } - - protected boolean isLeaf() { - return currentRowNode != null && currentRowNode.hasOccurrences() - && !currentRowNode.hasChildren(); - } - - //TODO expose this in a PrefixTreeScanner interface - public boolean isBeforeFirst(){ - return beforeFirst; - } - - public boolean isAfterLast(){ - return afterLast; - } - - protected boolean isOutOfBounds(){ - return beforeFirst || afterLast; - } - - protected boolean isFirstCellInRow() { - return currentCellIndex == 0; - } - - protected boolean isLastCellInRow() { - return currentCellIndex == currentRowNode.getLastCellIndex(); - } - - - /********************* fill in family/qualifier/ts/type/value ************/ - - protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) { - populateNonRowFields(cellNum); - return CellComparator.compareStatic(this, key); - } - - protected void populateFirstNonRowFields() { - populateNonRowFields(0); - } - - protected void populatePreviousNonRowFields() { - populateNonRowFields(currentCellIndex - 1); - } - - protected void populateLastNonRowFields() { - populateNonRowFields(currentRowNode.getLastCellIndex()); - } - - protected void populateNonRowFields(int cellIndex) { - currentCellIndex = cellIndex; - populateFamily(); - populateQualifier(); - populateTimestamp(); - populateMvccVersion(); - populateType(); - populateValueOffsets(); - } - - protected void populateFamily() { - int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta); - familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset(); - familyLength = familyReader.getColumnLength(); - } - - protected void populateQualifier() { - int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta); - qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset(); - qualifierLength = qualifierReader.getColumnLength(); - } - - protected void populateTimestamp() { - if (blockMeta.isAllSameTimestamp()) { - timestamp = blockMeta.getMinTimestamp(); - } else { - int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta); - timestamp = timestampDecoder.getLong(timestampIndex); - } - } - - protected void populateMvccVersion() { - if (blockMeta.isAllSameMvccVersion()) { - mvccVersion = blockMeta.getMinMvccVersion(); - } else { - int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex, - blockMeta); - mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex); - } - } - - protected void populateType() { - int typeInt; - if (blockMeta.isAllSameType()) { - typeInt = blockMeta.getAllTypes(); - } else { - typeInt = currentRowNode.getType(currentCellIndex, blockMeta); - } - type = PrefixTreeCell.TYPES[typeInt]; - } - - protected void populateValueOffsets() { - int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta); - absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection; - valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta); - } - - - /**************** getters ***************************/ - - public byte[] getTreeBytes() { - return block; - } - - public PrefixTreeBlockMeta getBlockMeta() { - return blockMeta; - } - - public int getMaxRowTreeStackNodes() { - return rowNodes.length; - } - - public int getRowBufferLength() { - return rowBuffer.length; - } - - public int getQualifierBufferLength() { - return qualifierBuffer.length; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java deleted file mode 100644 index 5201b6d..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java +++ /dev/null @@ -1,405 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; - -import com.google.common.primitives.UnsignedBytes; - -/** - * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to - * position itself on a requested Cell without scanning through cells before it. The PrefixTree is - * set up to be a Trie of rows, so finding a particular row is extremely cheap. - *

    - * Once it finds the row, it does a binary search through the cells inside the row, which is not as - * fast as the trie search, but faster than iterating through every cell like existing block - * formats - * do. For this reason, this implementation is targeted towards schemas where rows are narrow - * enough - * to have several or many per block, and where you are generally looking for the entire row or - * the - * first cell. It will still be fast for wide rows or point queries, but could be improved upon. - */ -@InterfaceAudience.Private -public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements - CellSearcher { - - /*************** construct ******************************/ - - public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { - super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); - } - - - /********************* CellSearcher methods *******************/ - - @Override - public boolean positionAt(Cell key) { - return CellScannerPosition.AT == positionAtOrAfter(key); - } - - @Override - public CellScannerPosition positionAtOrBefore(Cell key) { - reInitFirstNode(); - int fanIndex = -1; - - while(true){ - //detect row mismatch. break loop if mismatch - int currentNodeDepth = rowLength; - int rowTokenComparison = compareToCurrentToken(key); - if(rowTokenComparison != 0){ - return fixRowTokenMissReverse(rowTokenComparison); - } - - //exact row found, move on to qualifier & ts - if(rowMatchesAfterCurrentPosition(key)){ - return positionAtQualifierTimestamp(key, true); - } - - //detect dead end (no fan to descend into) - if(!currentRowNode.hasFan()){ - if(hasOccurrences()){//must be leaf or nub - populateLastNonRowFields(); - return CellScannerPosition.BEFORE; - }else{ - //TODO i don't think this case is exercised by any tests - return fixRowFanMissReverse(0); - } - } - - //keep hunting for the rest of the row - byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth); - fanIndex = currentRowNode.whichFanNode(searchForByte); - if(fanIndex < 0){//no matching row. return early - int insertionPoint = -fanIndex; - return fixRowFanMissReverse(insertionPoint); - } - //found a match, so dig deeper into the tree - followFan(fanIndex); - } - } - - /** - * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra - * if-statements. Priority on readability and debugability. - */ - @Override - public CellScannerPosition positionAtOrAfter(Cell key) { - reInitFirstNode(); - int fanIndex = -1; - - while(true){ - //detect row mismatch. break loop if mismatch - int currentNodeDepth = rowLength; - int rowTokenComparison = compareToCurrentToken(key); - if(rowTokenComparison != 0){ - return fixRowTokenMissForward(rowTokenComparison); - } - - //exact row found, move on to qualifier & ts - if(rowMatchesAfterCurrentPosition(key)){ - return positionAtQualifierTimestamp(key, false); - } - - //detect dead end (no fan to descend into) - if(!currentRowNode.hasFan()){ - if(hasOccurrences()){ - populateFirstNonRowFields(); - return CellScannerPosition.AFTER; - }else{ - //TODO i don't think this case is exercised by any tests - return fixRowFanMissForward(0); - } - } - - //keep hunting for the rest of the row - byte searchForByte = CellUtil.getRowByte(key, currentNodeDepth); - fanIndex = currentRowNode.whichFanNode(searchForByte); - if(fanIndex < 0){//no matching row. return early - int insertionPoint = -fanIndex; - return fixRowFanMissForward(insertionPoint); - } - //found a match, so dig deeper into the tree - followFan(fanIndex); - } - } - - @Override - public boolean seekForwardTo(Cell key) { - if(currentPositionIsAfter(key)){ - //our position is after the requested key, so can't do anything - return false; - } - return positionAt(key); - } - - @Override - public CellScannerPosition seekForwardToOrBefore(Cell key) { - //Do we even need this check or should upper layers avoid this situation. It's relatively - //expensive compared to the rest of the seek operation. - if(currentPositionIsAfter(key)){ - //our position is after the requested key, so can't do anything - return CellScannerPosition.AFTER; - } - - return positionAtOrBefore(key); - } - - @Override - public CellScannerPosition seekForwardToOrAfter(Cell key) { - //Do we even need this check or should upper layers avoid this situation. It's relatively - //expensive compared to the rest of the seek operation. - if(currentPositionIsAfter(key)){ - //our position is after the requested key, so can't do anything - return CellScannerPosition.AFTER; - } - - return positionAtOrAfter(key); - } - - /** - * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false - */ - @Override - public void positionAfterLastCell() { - resetToBeforeFirstEntry(); - beforeFirst = false; - afterLast = true; - } - - - /***************** Object methods ***************************/ - - @Override - public boolean equals(Object obj) { - //trivial override to confirm intent (findbugs) - return super.equals(obj); - } - - - /****************** internal methods ************************/ - - protected boolean currentPositionIsAfter(Cell cell){ - return compareTo(cell) > 0; - } - - protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) { - int minIndex = 0; - int maxIndex = currentRowNode.getLastCellIndex(); - int diff; - while (true) { - int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow - diff = populateNonRowFieldsAndCompareTo(midIndex, key); - - if (diff == 0) {// found exact match - return CellScannerPosition.AT; - } else if (minIndex == maxIndex) {// even termination case - break; - } else if ((minIndex + 1) == maxIndex) {// odd termination case - diff = populateNonRowFieldsAndCompareTo(maxIndex, key); - if(diff > 0){ - diff = populateNonRowFieldsAndCompareTo(minIndex, key); - } - break; - } else if (diff < 0) {// keep going forward - minIndex = currentCellIndex; - } else {// went past it, back up - maxIndex = currentCellIndex; - } - } - - if (diff == 0) { - return CellScannerPosition.AT; - - } else if (diff < 0) {// we are before key - if (beforeOnMiss) { - return CellScannerPosition.BEFORE; - } - if (advance()) { - return CellScannerPosition.AFTER; - } - return CellScannerPosition.AFTER_LAST; - - } else {// we are after key - if (!beforeOnMiss) { - return CellScannerPosition.AFTER; - } - if (previous()) { - return CellScannerPosition.BEFORE; - } - return CellScannerPosition.BEFORE_FIRST; - } - } - - /** - * compare this.row to key.row but starting at the current rowLength - * @param key Cell being searched for - * @return true if row buffer contents match key.row - */ - protected boolean rowMatchesAfterCurrentPosition(Cell key) { - if (!currentRowNode.hasOccurrences()) { - return false; - } - int thatRowLength = key.getRowLength(); - if (rowLength != thatRowLength) { - return false; - } - return true; - } - - // TODO move part of this to Cell comparator? - /** - * Compare only the bytes within the window of the current token - * @param key - * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after - */ - protected int compareToCurrentToken(Cell key) { - int startIndex = rowLength - currentRowNode.getTokenLength(); - int endIndexExclusive = startIndex + currentRowNode.getTokenLength(); - for (int i = startIndex; i < endIndexExclusive; ++i) { - if (i >= key.getRowLength()) {// key was shorter, so it's first - return -1; - } - byte keyByte = CellUtil.getRowByte(key, i); - byte thisByte = rowBuffer[i]; - if (keyByte == thisByte) { - continue; - } - return UnsignedBytes.compare(keyByte, thisByte); - } - return 0; - } - - protected void followLastFansUntilExhausted(){ - while(currentRowNode.hasFan()){ - followLastFan(); - } - } - - - /****************** complete seek when token mismatch ******************/ - - /** - * @param searcherIsAfterInputKey <0: input key is before the searcher's position
    - * >0: input key is after the searcher's position - */ - protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) { - if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up - boolean foundPreviousRow = previousRow(true); - if(foundPreviousRow){ - populateLastNonRowFields(); - return CellScannerPosition.BEFORE; - }else{ - return CellScannerPosition.BEFORE_FIRST; - } - - }else{//searcher position is before the input key - if(currentRowNode.hasOccurrences()){ - populateFirstNonRowFields(); - return CellScannerPosition.BEFORE; - } - boolean foundNextRow = nextRow(); - if(foundNextRow){ - return CellScannerPosition.AFTER; - }else{ - return CellScannerPosition.AFTER_LAST; - } - } - } - - /** - * @param searcherIsAfterInputKey <0: input key is before the searcher's position
    - * >0: input key is after the searcher's position - */ - protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) { - if (searcherIsAfterInputKey < 0) {//searcher position is after the input key - if(currentRowNode.hasOccurrences()){ - populateFirstNonRowFields(); - return CellScannerPosition.AFTER; - } - boolean foundNextRow = nextRow(); - if(foundNextRow){ - return CellScannerPosition.AFTER; - }else{ - return CellScannerPosition.AFTER_LAST; - } - - }else{//searcher position is before the input key, so go forward - discardCurrentRowNode(true); - boolean foundNextRow = nextRow(); - if(foundNextRow){ - return CellScannerPosition.AFTER; - }else{ - return CellScannerPosition.AFTER_LAST; - } - } - } - - - /****************** complete seek when fan mismatch ******************/ - - protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){ - if(fanInsertionPoint == 0){//we need to back up a row - boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row - if(foundPreviousRow){ - populateLastNonRowFields(); - return CellScannerPosition.BEFORE; - } - return CellScannerPosition.BEFORE_FIRST; - } - - //follow the previous fan, but then descend recursively forward - followFan(fanInsertionPoint - 1); - followLastFansUntilExhausted(); - populateLastNonRowFields(); - return CellScannerPosition.BEFORE; - } - - protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){ - if(fanInsertionPoint >= currentRowNode.getFanOut()){ - discardCurrentRowNode(true); - if (!nextRow()) { - return CellScannerPosition.AFTER_LAST; - } else { - return CellScannerPosition.AFTER; - } - } - - followFan(fanInsertionPoint); - if(hasOccurrences()){ - populateFirstNonRowFields(); - return CellScannerPosition.AFTER; - } - - if(nextRowInternal()){ - populateFirstNonRowFields(); - return CellScannerPosition.AFTER; - - }else{ - return CellScannerPosition.AFTER_LAST; - } - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java deleted file mode 100644 index 5573c02..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; - -/** - * As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields - * of this class so that Cell logic can be applied, but without allocating new memory for every Cell - * iterated through. - */ -@InterfaceAudience.Private -public class PrefixTreeCell implements Cell, Comparable { - - /********************** static **********************/ - - public static final KeyValue.Type[] TYPES = new KeyValue.Type[256]; - static { - for (KeyValue.Type type : KeyValue.Type.values()) { - TYPES[type.getCode() & 0xff] = type; - } - } - - //Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized. - public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put; - - /******************** fields ************************/ - - protected byte[] block; - //we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler - protected boolean includeMvccVersion; - - protected byte[] rowBuffer; - protected int rowLength; - - protected byte[] familyBuffer; - protected int familyOffset; - protected int familyLength; - - protected byte[] qualifierBuffer;// aligned to the end of the array - protected int qualifierOffset; - protected int qualifierLength; - - protected Long timestamp; - protected Long mvccVersion; - - protected KeyValue.Type type; - - protected int absoluteValueOffset; - protected int valueLength; - - - /********************** Cell methods ******************/ - - /** - * For debugging. Currently creates new KeyValue to utilize its toString() method. - */ - @Override - public String toString() { - return getKeyValueString(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof Cell)) { - return false; - } - //Temporary hack to maintain backwards compatibility with KeyValue.equals - return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj); - - //TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907 - } - - @Override - public int hashCode(){ - //Temporary hack to maintain backwards compatibility with KeyValue.hashCode - //I don't think this is used in any hot code paths - return KeyValueUtil.copyToNewKeyValue(this).hashCode(); - - //TODO return CellComparator.hashCode(this);//see HBASE-6907 - } - - @Override - public int compareTo(Cell other) { - return CellComparator.compareStatic(this, other); - } - - @Override - public long getTimestamp() { - return timestamp; - } - - @Override - public long getMvccVersion() { - if (!includeMvccVersion) { - return 0L; - } - return mvccVersion; - } - - @Override - public int getValueLength() { - return valueLength; - } - - @Override - public byte[] getRowArray() { - return rowBuffer; - } - - @Override - public int getRowOffset() { - return 0; - } - - @Override - public short getRowLength() { - return (short) rowLength; - } - - @Override - public byte[] getFamilyArray() { - return familyBuffer; - } - - @Override - public int getFamilyOffset() { - return familyOffset; - } - - @Override - public byte getFamilyLength() { - return (byte) familyLength; - } - - @Override - public byte[] getQualifierArray() { - return qualifierBuffer; - } - - @Override - public int getQualifierOffset() { - return qualifierOffset; - } - - @Override - public int getQualifierLength() { - return qualifierLength; - } - - @Override - public byte[] getValueArray() { - return block; - } - - @Override - public int getValueOffset() { - return absoluteValueOffset; - } - - @Override - public byte getTypeByte() { - return type.getCode(); - } - - - /************************* helper methods *************************/ - - /** - * Need this separate method so we can call it from subclasses' toString() methods - */ - protected String getKeyValueString(){ - KeyValue kv = KeyValueUtil.copyToNewKeyValue(this); - return kv.toString(); - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java deleted file mode 100644 index 1623876..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode.column; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.util.vint.UFIntTool; -import org.apache.hbase.util.vint.UVIntTool; - -@InterfaceAudience.Private -public class ColumnNodeReader { - - /**************** fields ************************/ - - protected PrefixTreeBlockMeta blockMeta; - protected byte[] block; - - protected byte[] columnBuffer; - protected boolean familyVsQualifier; - - protected int offsetIntoBlock; - - protected int tokenOffsetIntoBlock; - protected int tokenLength; - protected int parentStartPosition; - - - /************** construct *************************/ - - public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) { - this.columnBuffer = columnBuffer; - this.familyVsQualifier = familyVsQualifier; - } - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { - this.blockMeta = blockMeta; - this.block = block; - } - - - /************* methods *****************************/ - - public void positionAt(int offsetIntoBlock) { - this.offsetIntoBlock = offsetIntoBlock; - tokenLength = UVIntTool.getInt(block, offsetIntoBlock); - tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength); - int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength; - int offsetWidth; - if (familyVsQualifier) { - offsetWidth = blockMeta.getFamilyOffsetWidth(); - } else { - offsetWidth = blockMeta.getQualifierOffsetWidth(); - } - parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth); - } - - public void prependTokenToBuffer(int bufferStartIndex) { - System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength); - } - - public boolean isRoot() { - if (familyVsQualifier) { - return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset(); - } else { - return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset(); - } - } - - - /************** standard methods *********************/ - - @Override - public String toString() { - return super.toString() + "[" + offsetIntoBlock + "]"; - } - - - /****************** get/set ****************************/ - - public int getTokenLength() { - return tokenLength; - } - - public int getParentStartPosition() { - return parentStartPosition; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java deleted file mode 100644 index 593031e..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode.column; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; - -/** - * Position one of these appropriately in the data block and you can call its methods to retrieve - * the family or qualifier at the current position. - */ -@InterfaceAudience.Private -public class ColumnReader { - - /****************** fields *************************/ - - protected PrefixTreeBlockMeta blockMeta; - - protected byte[] columnBuffer; - protected int columnOffset; - protected int columnLength; - protected boolean familyVsQualifier; - - protected ColumnNodeReader columnNodeReader; - - - /******************** construct *******************/ - - public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) { - this.columnBuffer = columnBuffer; - this.familyVsQualifier = familyVsQualifier; - this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier); - } - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { - this.blockMeta = blockMeta; - clearColumnBuffer(); - columnNodeReader.initOnBlock(blockMeta, block); - } - - - /********************* methods *******************/ - - public ColumnReader populateBuffer(int offsetIntoColumnData) { - clearColumnBuffer(); - int nextRelativeOffset = offsetIntoColumnData; - while (true) { - int absoluteOffset; - if (familyVsQualifier) { - absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset; - } else { - absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset; - } - columnNodeReader.positionAt(absoluteOffset); - columnOffset -= columnNodeReader.getTokenLength(); - columnLength += columnNodeReader.getTokenLength(); - columnNodeReader.prependTokenToBuffer(columnOffset); - if (columnNodeReader.isRoot()) { - return this; - } - nextRelativeOffset = columnNodeReader.getParentStartPosition(); - } - } - - public byte[] copyBufferToNewArray() {// for testing - byte[] out = new byte[columnLength]; - System.arraycopy(columnBuffer, columnOffset, out, 0, out.length); - return out; - } - - public int getColumnLength() { - return columnLength; - } - - public void clearColumnBuffer() { - columnOffset = columnBuffer.length; - columnLength = 0; - } - - - /****************************** get/set *************************************/ - - public int getColumnOffset() { - return columnOffset; - } - -} - diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java deleted file mode 100644 index 1adc838..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode.row; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.util.vint.UFIntTool; -import org.apache.hbase.util.vint.UVIntTool; - -/** - * Position one of these appropriately in the data block and you can call its methods to retrieve - * information necessary to decode the cells in the row. - */ -@InterfaceAudience.Private -public class RowNodeReader { - - /************* fields ***********************************/ - - protected byte[] block; - protected int offset; - protected int fanIndex; - - protected int numCells; - - protected int tokenOffset; - protected int tokenLength; - protected int fanOffset; - protected int fanOut; - - protected int familyOffsetsOffset; - protected int qualifierOffsetsOffset; - protected int timestampIndexesOffset; - protected int mvccVersionIndexesOffset; - protected int operationTypesOffset; - protected int valueOffsetsOffset; - protected int valueLengthsOffset; - protected int nextNodeOffsetsOffset; - - - /******************* construct **************************/ - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) { - this.block = block; - - this.offset = offset; - resetFanIndex(); - - this.tokenLength = UVIntTool.getInt(block, offset); - this.tokenOffset = offset + UVIntTool.numBytes(tokenLength); - - this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength); - this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut); - - this.numCells = UVIntTool.getInt(block, fanOffset + fanOut); - - this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells); - this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth(); - this.timestampIndexesOffset = qualifierOffsetsOffset + numCells - * blockMeta.getQualifierOffsetWidth(); - this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells - * blockMeta.getTimestampIndexWidth(); - this.operationTypesOffset = mvccVersionIndexesOffset + numCells - * blockMeta.getMvccVersionIndexWidth(); - this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth(); - this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth(); - this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth(); - } - - - /******************** methods ****************************/ - - public boolean isLeaf() { - return fanOut == 0; - } - - public boolean isNub() { - return fanOut > 0 && numCells > 0; - } - - public boolean isBranch() { - return fanOut > 0 && numCells == 0; - } - - public boolean hasOccurrences() { - return numCells > 0; - } - - public int getTokenArrayOffset(){ - return tokenOffset; - } - - public int getTokenLength() { - return tokenLength; - } - - public byte getFanByte(int i) { - return block[fanOffset + i]; - } - - /** - * for debugging - */ - protected String getFanByteReadable(int i){ - return Bytes.toStringBinary(block, fanOffset + i, 1); - } - - public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getFamilyOffsetWidth(); - int startIndex = familyOffsetsOffset + fIntWidth * index; - return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - } - - public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getQualifierOffsetWidth(); - int startIndex = qualifierOffsetsOffset + fIntWidth * index; - return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - } - - public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getTimestampIndexWidth(); - int startIndex = timestampIndexesOffset + fIntWidth * index; - return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - } - - public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getMvccVersionIndexWidth(); - int startIndex = mvccVersionIndexesOffset + fIntWidth * index; - return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - } - - public int getType(int index, PrefixTreeBlockMeta blockMeta) { - if (blockMeta.isAllSameType()) { - return blockMeta.getAllTypes(); - } - return block[operationTypesOffset + index]; - } - - public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getValueOffsetWidth(); - int startIndex = valueOffsetsOffset + fIntWidth * index; - int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - return offset; - } - - public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getValueLengthWidth(); - int startIndex = valueLengthsOffset + fIntWidth * index; - int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - return length; - } - - public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) { - int fIntWidth = blockMeta.getNextNodeOffsetWidth(); - int startIndex = nextNodeOffsetsOffset + fIntWidth * index; - return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); - } - - public String getBranchNubLeafIndicator() { - if (isNub()) { - return "N"; - } - return isBranch() ? "B" : "L"; - } - - public boolean hasChildren() { - return fanOut > 0; - } - - public int getLastFanIndex() { - return fanOut - 1; - } - - public int getLastCellIndex() { - return numCells - 1; - } - - public int getNumCells() { - return numCells; - } - - public int getFanOut() { - return fanOut; - } - - public byte[] getToken() { - // TODO pass in reusable ByteRange - return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray(); - } - - public int getOffset() { - return offset; - } - - public int whichFanNode(byte searchForByte) { - if( ! hasFan()){ - throw new IllegalStateException("This row node has no fan, so can't search it"); - } - int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut, - searchForByte); - if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block - return fanIndexInBlock - fanOffset; - } - return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse - } - - public void resetFanIndex() { - fanIndex = -1;// just the way the logic currently works - } - - public int getFanIndex() { - return fanIndex; - } - - public void setFanIndex(int fanIndex) { - this.fanIndex = fanIndex; - } - - public boolean hasFan(){ - return fanOut > 0; - } - - public boolean hasPreviousFanNodes() { - return fanOut > 0 && fanIndex > 0; - } - - public boolean hasMoreFanNodes() { - return fanIndex < getLastFanIndex(); - } - - public boolean isOnLastFanNode() { - return !hasMoreFanNodes(); - } - - - /*************** standard methods **************************/ - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut)); - sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength)); - sb.append(",numCells:" + numCells); - sb.append(",fanIndex:"+fanIndex); - if(fanIndex>=0){ - sb.append("("+getFanByteReadable(fanIndex)+")"); - } - return sb.toString(); - } -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java deleted file mode 100644 index 5a88fdf..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode.timestamp; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.util.vint.UFIntTool; - -/** - * Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block. - */ -@InterfaceAudience.Private -public class MvccVersionDecoder { - - protected PrefixTreeBlockMeta blockMeta; - protected byte[] block; - - - /************** construct ***********************/ - - public MvccVersionDecoder() { - } - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { - this.block = block; - this.blockMeta = blockMeta; - } - - - /************** methods *************************/ - - public long getMvccVersion(int index) { - if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical - return blockMeta.getMinMvccVersion(); - } - int startIndex = blockMeta.getAbsoluteMvccVersionOffset() - + blockMeta.getMvccVersionDeltaWidth() * index; - long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth()); - return blockMeta.getMinMvccVersion() + delta; - } -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java deleted file mode 100644 index b3e122a..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.decode.timestamp; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.util.vint.UFIntTool; - -/** - * Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block. - */ -@InterfaceAudience.Private -public class TimestampDecoder { - - protected PrefixTreeBlockMeta blockMeta; - protected byte[] block; - - - /************** construct ***********************/ - - public TimestampDecoder() { - } - - public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { - this.block = block; - this.blockMeta = blockMeta; - } - - - /************** methods *************************/ - - public long getLong(int index) { - if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical - return blockMeta.getMinTimestamp(); - } - int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth() - * index; - long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth()); - return blockMeta.getMinTimestamp() + delta; - } -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java deleted file mode 100644 index b26607f..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode; - -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the - * ones retrieved from the pool for usage. - */ -@InterfaceAudience.Private -public class EncoderFactory { - - private static final EncoderPool POOL = new ThreadLocalEncoderPool(); - - - public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) { - return POOL.checkOut(outputStream, includeMvccVersion); - } - - public static void checkIn(PrefixTreeEncoder encoder) { - POOL.checkIn(encoder); - } - - - /**************************** helper ******************************/ - - protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder, - OutputStream outputStream, boolean includeMvccVersion) { - PrefixTreeEncoder ret = encoder; - if (encoder == null) { - ret = new PrefixTreeEncoder(outputStream, includeMvccVersion); - } - ret.reset(outputStream, includeMvccVersion); - return ret; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java deleted file mode 100644 index ca73f91..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode; - -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - - -@InterfaceAudience.Private -public interface EncoderPool { - - PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion); - void checkIn(PrefixTreeEncoder encoder); - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java deleted file mode 100644 index 46cb707..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode; - -import java.io.IOException; -import java.io.OutputStream; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.io.CellOutputStream; -import org.apache.hadoop.hbase.util.ArrayUtils; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.io.WritableUtils; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; -import org.apache.hbase.codec.prefixtree.encode.other.CellTypeEncoder; -import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; -import org.apache.hbase.codec.prefixtree.encode.row.RowSectionWriter; -import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; -import org.apache.hbase.util.byterange.ByteRangeSet; -import org.apache.hbase.util.byterange.impl.ByteRangeHashSet; -import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; -import org.apache.hbase.util.vint.UFIntTool; - -/** - * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are - * added they are completely copied into the various encoding structures. This is important because - * usually the cells being fed in during compactions will be transient.
    - *
    - * Usage:
    - * 1) constructor
    - * 4) append cells in sorted order: write(Cell cell)
    - * 5) flush()
    - */ -@InterfaceAudience.Private -public class PrefixTreeEncoder implements CellOutputStream { - - /**************** static ************************/ - - protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class); - - //future-proof where HBase supports multiple families in a data block. - public static final boolean MULITPLE_FAMILIES_POSSIBLE = false; - - private static final boolean USE_HASH_COLUMN_SORTER = true; - private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256; - private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024; - - - /**************** fields *************************/ - - protected long numResets = 0L; - - protected OutputStream outputStream; - - /* - * Cannot change during a single block's encoding. If false, then substitute incoming Cell's - * mvccVersion with zero and write out the block as usual. - */ - protected boolean includeMvccVersion; - - /* - * reusable ByteRanges used for communicating with the sorters/compilers - */ - protected ByteRange rowRange; - protected ByteRange familyRange; - protected ByteRange qualifierRange; - - /* - * incoming Cell fields are copied into these arrays - */ - protected long[] timestamps; - protected long[] mvccVersions; - protected byte[] typeBytes; - protected int[] valueOffsets; - protected byte[] values; - - protected PrefixTreeBlockMeta blockMeta; - - /* - * Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and - * compile before flushing. - */ - protected LongEncoder timestampEncoder; - protected LongEncoder mvccVersionEncoder; - protected CellTypeEncoder cellTypeEncoder; - - /* - * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them - * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by - * comparing only with the previous row key, families and qualifiers can arrive in unsorted order - * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them. - */ - protected ByteRangeSet familyDeduplicator; - protected ByteRangeSet qualifierDeduplicator; - - /* - * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory - * trie structure with nodes connected by memory pointers (not serializable yet). - */ - protected Tokenizer rowTokenizer; - protected Tokenizer familyTokenizer; - protected Tokenizer qualifierTokenizer; - - /* - * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write - * all information to an output stream of bytes that can be stored on disk. - */ - protected RowSectionWriter rowWriter; - protected ColumnSectionWriter familyWriter; - protected ColumnSectionWriter qualifierWriter; - - /* - * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they - * were full KeyValues because some parts of HBase like to know the "unencoded size". - */ - protected int totalCells = 0; - protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues - protected int totalValueBytes = 0; - protected int maxValueLength = 0; - protected int totalBytes = 0;// - - - /***************** construct ***********************/ - - public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) { - // used during cell accumulation - this.blockMeta = new PrefixTreeBlockMeta(); - this.rowRange = new ByteRange(); - this.familyRange = new ByteRange(); - this.qualifierRange = new ByteRange(); - this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES]; - this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES]; - this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES]; - this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES]; - this.values = new byte[VALUE_BUFFER_INIT_SIZE]; - - // used during compilation - this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() - : new ByteRangeTreeSet(); - this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() - : new ByteRangeTreeSet(); - this.timestampEncoder = new LongEncoder(); - this.mvccVersionEncoder = new LongEncoder(); - this.cellTypeEncoder = new CellTypeEncoder(); - this.rowTokenizer = new Tokenizer(); - this.familyTokenizer = new Tokenizer(); - this.qualifierTokenizer = new Tokenizer(); - this.rowWriter = new RowSectionWriter(); - this.familyWriter = new ColumnSectionWriter(); - this.qualifierWriter = new ColumnSectionWriter(); - - reset(outputStream, includeMvccVersion); - } - - public void reset(OutputStream outputStream, boolean includeMvccVersion) { - ++numResets; - this.includeMvccVersion = includeMvccVersion; - this.outputStream = outputStream; - valueOffsets[0] = 0; - - familyDeduplicator.reset(); - qualifierDeduplicator.reset(); - rowTokenizer.reset(); - timestampEncoder.reset(); - mvccVersionEncoder.reset(); - cellTypeEncoder.reset(); - familyTokenizer.reset(); - qualifierTokenizer.reset(); - rowWriter.reset(); - familyWriter.reset(); - qualifierWriter.reset(); - - totalCells = 0; - totalUnencodedBytes = 0; - totalValueBytes = 0; - maxValueLength = 0; - totalBytes = 0; - } - - /** - * Check that the arrays used to hold cell fragments are large enough for the cell that is being - * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the - * first few block encodings but should stabilize quickly. - */ - protected void ensurePerCellCapacities() { - int currentCapacity = valueOffsets.length; - int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe - if (neededCapacity < currentCapacity) { - return; - } - - int padding = neededCapacity;//this will double the array size - timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding); - mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding); - typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding); - valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding); - } - - /******************** CellOutputStream methods *************************/ - - /** - * Note: Unused until support is added to the scanner/heap - *

    - * The following method are optimized versions of write(Cell cell). The result should be - * identical, however the implementation may be able to execute them much more efficiently because - * it does not need to compare the unchanged fields with the previous cell's. - *

    - * Consider the benefits during compaction when paired with a CellScanner that is also aware of - * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells - * to the write(Cell cell) method. - *

    - * The savings of skipping duplicate row detection are significant with long row keys. A - * DataBlockEncoder may store a row key once in combination with a count of how many cells are in - * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment - * of the counter, and that is for every cell in the row. - */ - - /** - * Add a Cell to the output stream but repeat the previous row. - */ - //@Override - public void writeWithRepeatRow(Cell cell) { - ensurePerCellCapacities();//can we optimize away some of this? - - //save a relatively expensive row comparison, incrementing the row's counter instead - rowTokenizer.incrementNumOccurrencesOfLatestValue(); - addFamilyPart(cell); - addQualifierPart(cell); - addAfterRowFamilyQualifier(cell); - } - - - @Override - public void write(Cell cell) { - ensurePerCellCapacities(); - - rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange)); - addFamilyPart(cell); - addQualifierPart(cell); - addAfterRowFamilyQualifier(cell); - } - - - /***************** internal add methods ************************/ - - private void addAfterRowFamilyQualifier(Cell cell){ - // timestamps - timestamps[totalCells] = cell.getTimestamp(); - timestampEncoder.add(cell.getTimestamp()); - - // memstore timestamps - if (includeMvccVersion) { - mvccVersions[totalCells] = cell.getMvccVersion(); - mvccVersionEncoder.add(cell.getMvccVersion()); - totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion()); - }else{ - //must overwrite in case there was a previous version in this array slot - mvccVersions[totalCells] = 0L; - if(totalCells == 0){//only need to do this for the first cell added - mvccVersionEncoder.add(0L); - } - //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled - } - - // types - typeBytes[totalCells] = cell.getTypeByte(); - cellTypeEncoder.add(cell.getTypeByte()); - - // values - totalValueBytes += cell.getValueLength(); - // double the array each time we run out of space - values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes); - CellUtil.copyValueTo(cell, values, valueOffsets[totalCells]); - if (cell.getValueLength() > maxValueLength) { - maxValueLength = cell.getValueLength(); - } - valueOffsets[totalCells + 1] = totalValueBytes; - - // general - totalUnencodedBytes += KeyValueUtil.length(cell); - ++totalCells; - } - - private void addFamilyPart(Cell cell) { - if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) { - CellUtil.fillFamilyRange(cell, familyRange); - familyDeduplicator.add(familyRange); - } - } - - private void addQualifierPart(Cell cell) { - CellUtil.fillQualifierRange(cell, qualifierRange); - qualifierDeduplicator.add(qualifierRange); - } - - - /****************** compiling/flushing ********************/ - - /** - * Expensive method. The second half of the encoding work happens here. - * - * Take all the separate accumulated data structures and turn them into a single stream of bytes - * which is written to the outputStream. - */ - @Override - public void flush() throws IOException { - compile(); - - // do the actual flushing to the output stream. Order matters. - blockMeta.writeVariableBytesToOutputStream(outputStream); - rowWriter.writeBytes(outputStream); - familyWriter.writeBytes(outputStream); - qualifierWriter.writeBytes(outputStream); - timestampEncoder.writeBytes(outputStream); - mvccVersionEncoder.writeBytes(outputStream); - //CellType bytes are in the row nodes. there is no additional type section - outputStream.write(values, 0, totalValueBytes); - } - - /** - * Now that all the cells have been added, do the work to reduce them to a series of byte[] - * fragments that are ready to be written to the output stream. - */ - protected void compile(){ - blockMeta.setNumKeyValueBytes(totalUnencodedBytes); - int lastValueOffset = valueOffsets[totalCells]; - blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); - blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); - blockMeta.setNumValueBytes(totalValueBytes); - totalBytes += totalValueBytes; - - //these compile methods will add to totalBytes - compileTypes(); - compileMvccVersions(); - compileTimestamps(); - compileQualifiers(); - compileFamilies(); - compileRows(); - - int numMetaBytes = blockMeta.calculateNumMetaBytes(); - blockMeta.setNumMetaBytes(numMetaBytes); - totalBytes += numMetaBytes; - } - - /** - * The following "compile" methods do any intermediate work necessary to transform the cell - * fragments collected during the writing phase into structures that are ready to write to the - * outputStream. - *

    - * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion. - */ - - protected void compileTypes() { - blockMeta.setAllSameType(cellTypeEncoder.areAllSameType()); - if(cellTypeEncoder.areAllSameType()){ - blockMeta.setAllTypes(cellTypeEncoder.getOnlyType()); - } - } - - protected void compileMvccVersions() { - mvccVersionEncoder.compile(); - blockMeta.setMvccVersionFields(mvccVersionEncoder); - int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength(); - totalBytes += numMvccVersionBytes; - } - - protected void compileTimestamps() { - timestampEncoder.compile(); - blockMeta.setTimestampFields(timestampEncoder); - int numTimestampBytes = timestampEncoder.getOutputArrayLength(); - totalBytes += numTimestampBytes; - } - - protected void compileQualifiers() { - blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size()); - qualifierDeduplicator.compile(); - qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges()); - qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false); - qualifierWriter.compile(); - int numQualifierBytes = qualifierWriter.getNumBytes(); - blockMeta.setNumQualifierBytes(numQualifierBytes); - totalBytes += numQualifierBytes; - } - - protected void compileFamilies() { - blockMeta.setNumUniqueFamilies(familyDeduplicator.size()); - familyDeduplicator.compile(); - familyTokenizer.addAll(familyDeduplicator.getSortedRanges()); - familyWriter.reconstruct(blockMeta, familyTokenizer, true); - familyWriter.compile(); - int numFamilyBytes = familyWriter.getNumBytes(); - blockMeta.setNumFamilyBytes(numFamilyBytes); - totalBytes += numFamilyBytes; - } - - protected void compileRows() { - rowWriter.reconstruct(this); - rowWriter.compile(); - int numRowBytes = rowWriter.getNumBytes(); - blockMeta.setNumRowBytes(numRowBytes); - blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth()); - totalBytes += numRowBytes; - } - - /********************* convenience getters ********************************/ - - public long getValueOffset(int index) { - return valueOffsets[index]; - } - - public int getValueLength(int index) { - return (int) (valueOffsets[index + 1] - valueOffsets[index]); - } - - /************************* get/set *************************************/ - - public PrefixTreeBlockMeta getBlockMeta() { - return blockMeta; - } - - public Tokenizer getRowTokenizer() { - return rowTokenizer; - } - - public LongEncoder getTimestampEncoder() { - return timestampEncoder; - } - - public int getTotalBytes() { - return totalBytes; - } - - public long[] getTimestamps() { - return timestamps; - } - - public long[] getMvccVersions() { - return mvccVersions; - } - - public byte[] getTypeBytes() { - return typeBytes; - } - - public LongEncoder getMvccVersionEncoder() { - return mvccVersionEncoder; - } - - public ByteRangeSet getFamilySorter() { - return familyDeduplicator; - } - - public ByteRangeSet getQualifierSorter() { - return qualifierDeduplicator; - } - - public ColumnSectionWriter getFamilyWriter() { - return familyWriter; - } - - public ColumnSectionWriter getQualifierWriter() { - return qualifierWriter; - } - - public RowSectionWriter getRowWriter() { - return rowWriter; - } - - public ByteRange getValueByteRange() { - return new ByteRange(values, 0, totalValueBytes); - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java deleted file mode 100644 index 3f9a00b..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode; - -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - - -/** - * Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and - * would be more garbage than the data in the block. A new encoder is needed for each block in - * a flush, compaction, RPC response, etc. - * - * It is not a pool in the traditional sense, but implements the semantics of a traditional pool - * via ThreadLocals to avoid sharing between threads. Sharing between threads would not be - * very expensive given that it's accessed per-block, but this is just as easy. - * - * This pool implementation assumes there is a one-to-one mapping between a single thread and a - * single flush or compaction. - */ -@InterfaceAudience.Private -public class ThreadLocalEncoderPool implements EncoderPool{ - - private static final ThreadLocal ENCODER - = new ThreadLocal(); - - /** - * Get the encoder attached to the current ThreadLocal, or create a new one and attach it to the - * current thread. - */ - @Override - public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) { - PrefixTreeEncoder builder = ENCODER.get(); - builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion); - ENCODER.set(builder); - return builder; - } - - @Override - public void checkIn(PrefixTreeEncoder encoder) { - // attached to thread on checkOut, so shouldn't need to do anything here - - // do we need to worry about detaching encoders from compaction threads or are the same threads - // used over and over - } - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java deleted file mode 100644 index b84e15a..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.column; - -import java.io.IOException; -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Strings; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.util.vint.UFIntTool; -import org.apache.hbase.util.vint.UVIntTool; - -/** - * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly. - * The family and qualifier sections of the data block are made of 1 or more of these nodes. - *

    - * Each node is composed of 3 sections:
    - *

  • tokenLength: UVInt (normally 1 byte) indicating the number of token bytes - *
  • token[]: the actual token bytes - *
  • parentStartPosition: the offset of the next node from the start of the family or qualifier - * section - */ -@InterfaceAudience.Private -public class ColumnNodeWriter{ - - /************* fields ****************************/ - - protected TokenizerNode builderNode; - protected PrefixTreeBlockMeta blockMeta; - - protected boolean familyVsQualifier; - - protected int tokenLength; - protected byte[] token; - protected int parentStartPosition; - - - /*************** construct **************************/ - - public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode, - boolean familyVsQualifier) { - this.blockMeta = blockMeta; - this.builderNode = builderNode; - this.familyVsQualifier = familyVsQualifier; - calculateTokenLength(); - } - - - /************* methods *******************************/ - - public boolean isRoot() { - return parentStartPosition == 0; - } - - private void calculateTokenLength() { - tokenLength = builderNode.getTokenLength(); - token = new byte[tokenLength]; - } - - /** - * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a - * placeholder. - * @param offsetWidthPlaceholder the placeholder - * @return node width - */ - public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) { - int width = 0; - width += UVIntTool.numBytes(tokenLength); - width += token.length; - width += offsetWidthPlaceholder; - return width; - } - - public void writeBytes(OutputStream os) throws IOException { - int parentOffsetWidth; - if (familyVsQualifier) { - parentOffsetWidth = blockMeta.getFamilyOffsetWidth(); - } else { - parentOffsetWidth = blockMeta.getQualifierOffsetWidth(); - } - UVIntTool.writeBytes(tokenLength, os); - os.write(token); - UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os); - } - - public void setTokenBytes(ByteRange source) { - source.deepCopySubRangeTo(0, tokenLength, token, 0); - } - - - /****************** standard methods ************************/ - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ","); - sb.append("["); - sb.append(Bytes.toString(token)); - sb.append("]->"); - sb.append(parentStartPosition); - return sb.toString(); - } - - - /************************** get/set ***********************/ - - public void setParentStartPosition(int parentStartPosition) { - this.parentStartPosition = parentStartPosition; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java deleted file mode 100644 index 3d2457d..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.column; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.util.vint.UFIntTool; - -import com.google.common.collect.Lists; - -/** - * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family - * section is written after the row section, and qualifier section after family section. - *

    - * The family and qualifier tries, or "column tries", are structured differently than the row trie. - * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and - * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a - * row, the row trie contains a list of offsets into the column sections (along with pointers to - * timestamps and other per-cell fields). These offsets point to the last column node/token that - * comprises the column name. To assemble the column name, the trie is traversed in reverse (right - * to left), with the rightmost tokens pointing to the start of their "parent" node which is the - * node to the left. - *

    - * This choice was made to reduce the size of the column trie by storing the minimum amount of - * offset data. As a result, to find a specific qualifier within a row, you must do a binary search - * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might - * encode the columns in both a forward and reverse trie, which would convert binary searches into - * more efficient trie searches which would be beneficial for wide rows. - */ -@InterfaceAudience.Private -public class ColumnSectionWriter { - - public static final int EXPECTED_NUBS_PLUS_LEAVES = 100; - - /****************** fields ****************************/ - - private PrefixTreeBlockMeta blockMeta; - - private boolean familyVsQualifier; - private Tokenizer tokenizer; - private int numBytes = 0; - private ArrayList nonLeaves; - private ArrayList leaves; - private ArrayList allNodes; - private ArrayList columnNodeWriters; - private List outputArrayOffsets; - - - /*********************** construct *********************/ - - public ColumnSectionWriter() { - this.nonLeaves = Lists.newArrayList(); - this.leaves = Lists.newArrayList(); - this.outputArrayOffsets = Lists.newArrayList(); - } - - public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder, - boolean familyVsQualifier) { - this();// init collections - reconstruct(blockMeta, builder, familyVsQualifier); - } - - public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder, - boolean familyVsQualifier) { - this.blockMeta = blockMeta; - this.tokenizer = builder; - this.familyVsQualifier = familyVsQualifier; - } - - public void reset() { - numBytes = 0; - nonLeaves.clear(); - leaves.clear(); - outputArrayOffsets.clear(); - } - - - /****************** methods *******************************/ - - public ColumnSectionWriter compile() { - if (familyVsQualifier) { - // do nothing. max family length fixed at Byte.MAX_VALUE - } else { - blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength()); - } - - tokenizer.setNodeFirstInsertionIndexes(); - - tokenizer.appendNodes(nonLeaves, true, false); - - tokenizer.appendNodes(leaves, false, true); - - allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size()); - allNodes.addAll(nonLeaves); - allNodes.addAll(leaves); - - columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes)); - for (int i = 0; i < allNodes.size(); ++i) { - TokenizerNode node = allNodes.get(i); - columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier)); - } - - // leaf widths are known at this point, so add them up - int totalBytesWithoutOffsets = 0; - for (int i = allNodes.size() - 1; i >= 0; --i) { - ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); - // leaves store all but their first token byte - totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0); - } - - // figure out how wide our offset FInts are - int parentOffsetWidth = 0; - while (true) { - ++parentOffsetWidth; - int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size(); - if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) { - numBytes = numBytesFinder; - break; - }// it fits - } - if (familyVsQualifier) { - blockMeta.setFamilyOffsetWidth(parentOffsetWidth); - } else { - blockMeta.setQualifierOffsetWidth(parentOffsetWidth); - } - - int forwardIndex = 0; - for (int i = 0; i < allNodes.size(); ++i) { - TokenizerNode node = allNodes.get(i); - ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); - int fullNodeWidth = columnNodeWriter - .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth); - node.setOutputArrayOffset(forwardIndex); - columnNodeWriter.setTokenBytes(node.getToken()); - if (node.isRoot()) { - columnNodeWriter.setParentStartPosition(0); - } else { - columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset()); - } - forwardIndex += fullNodeWidth; - } - - tokenizer.appendOutputArrayOffsets(outputArrayOffsets); - - return this; - } - - public void writeBytes(OutputStream os) throws IOException { - for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) { - columnNodeWriter.writeBytes(os); - } - } - - - /************* get/set **************************/ - - public ArrayList getColumnNodeWriters() { - return columnNodeWriters; - } - - public int getNumBytes() { - return numBytes; - } - - public int getOutputArrayOffset(int sortedIndex) { - return outputArrayOffsets.get(sortedIndex); - } - - public ArrayList getNonLeaves() { - return nonLeaves; - } - - public ArrayList getLeaves() { - return leaves; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java deleted file mode 100644 index 963c307..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.other; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each - * KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta, - * therefore not repeating it for each cell and saving 1 byte per cell. - */ -@InterfaceAudience.Private -public class CellTypeEncoder { - - /************* fields *********************/ - - protected boolean pendingFirstType = true; - protected boolean allSameType = true; - protected byte onlyType; - - - /************* construct *********************/ - - public void reset() { - pendingFirstType = true; - allSameType = true; - } - - - /************* methods *************************/ - - public void add(byte type) { - if (pendingFirstType) { - onlyType = type; - pendingFirstType = false; - } else if (onlyType != type) { - allSameType = false; - } - } - - - /**************** get/set **************************/ - - public boolean areAllSameType() { - return allSameType; - } - - public byte getOnlyType() { - return onlyType; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java deleted file mode 100644 index baf20f6..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.other; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.HashSet; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ArrayUtils; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hbase.util.vint.UFIntTool; - -import com.google.common.base.Joiner; - -/** - * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a - * collection of Cells. - * - * 1. add longs to a HashSet for fast de-duplication - * 2. keep track of the min and max - * 3. copy all values to a new long[] - * 4. Collections.sort the long[] - * 5. calculate maxDelta = max - min - * 6. determine FInt width based on maxDelta - * 7. PrefixTreeEncoder binary searches to find index of each value - */ -@InterfaceAudience.Private -public class LongEncoder { - - /****************** fields ****************************/ - - protected HashSet uniqueValues; - protected long[] sortedUniqueValues; - protected long min, max, maxDelta; - - protected int bytesPerDelta; - protected int bytesPerIndex; - protected int totalCompressedBytes; - - - /****************** construct ****************************/ - - public LongEncoder() { - this.uniqueValues = new HashSet(); - } - - public void reset() { - uniqueValues.clear(); - sortedUniqueValues = null; - min = Long.MAX_VALUE; - max = Long.MIN_VALUE; - maxDelta = Long.MIN_VALUE; - bytesPerIndex = 0; - bytesPerDelta = 0; - totalCompressedBytes = 0; - } - - - /************* methods ***************************/ - - public void add(long timestamp) { - uniqueValues.add(timestamp); - } - - public LongEncoder compile() { - int numUnique = uniqueValues.size(); - if (numUnique == 1) { - min = CollectionUtils.getFirst(uniqueValues); - sortedUniqueValues = new long[] { min }; - return this; - } - - sortedUniqueValues = new long[numUnique]; - int lastIndex = -1; - for (long value : uniqueValues) { - sortedUniqueValues[++lastIndex] = value; - } - Arrays.sort(sortedUniqueValues); - min = ArrayUtils.getFirst(sortedUniqueValues); - max = ArrayUtils.getLast(sortedUniqueValues); - maxDelta = max - min; - if (maxDelta > 0) { - bytesPerDelta = UFIntTool.numBytes(maxDelta); - } else { - bytesPerDelta = 0; - } - - int maxIndex = numUnique - 1; - bytesPerIndex = UFIntTool.numBytes(maxIndex); - - totalCompressedBytes = numUnique * bytesPerDelta; - - return this; - } - - public long getDelta(int index) { - if (sortedUniqueValues.length == 0) { - return 0; - } - return sortedUniqueValues[index] - min; - } - - public int getIndex(long value) { - // should always find an exact match - return Arrays.binarySearch(sortedUniqueValues, value); - } - - public void writeBytes(OutputStream os) throws IOException { - for (int i = 0; i < sortedUniqueValues.length; ++i) { - long delta = sortedUniqueValues[i] - min; - UFIntTool.writeBytes(bytesPerDelta, delta, os); - } - } - - //convenience method for tests - public byte[] getByteArray() throws IOException{ - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - writeBytes(baos); - return baos.toByteArray(); - } - - public int getOutputArrayLength() { - return sortedUniqueValues.length * bytesPerDelta; - } - - public int getNumUniqueValues() { - return sortedUniqueValues.length; - } - - - /******************* Object methods **********************/ - - @Override - public String toString() { - if (ArrayUtils.isEmpty(sortedUniqueValues)) { - return "[]"; - } - return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]"; - } - - - /******************** get/set **************************/ - - public long getMin() { - return min; - } - - public int getBytesPerDelta() { - return bytesPerDelta; - } - - public int getBytesPerIndex() { - return bytesPerIndex; - } - - public int getTotalCompressedBytes() { - return totalCompressedBytes; - } - - public long[] getSortedUniqueTimestamps() { - return sortedUniqueValues; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java deleted file mode 100644 index 748a7f6..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.row; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRangeTool; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.util.vint.UFIntTool; -import org.apache.hbase.util.vint.UVIntTool; - -/** - * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf. - * Please see the write() method for the order in which data is written. - */ -@InterfaceAudience.Private -public class RowNodeWriter{ - protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class); - - /********************* fields ******************************/ - - protected PrefixTreeEncoder prefixTreeEncoder; - protected PrefixTreeBlockMeta blockMeta; - protected TokenizerNode tokenizerNode; - - protected int tokenWidth; - protected int fanOut; - protected int numCells; - - protected int width; - - - /*********************** construct *************************/ - - public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) { - reconstruct(keyValueBuilder, tokenizerNode); - } - - public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) { - this.prefixTreeEncoder = prefixTreeEncoder; - reset(tokenizerNode); - } - - public void reset(TokenizerNode node) { - this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks - this.tokenizerNode = node; - this.tokenWidth = 0; - this.fanOut = 0; - this.numCells = 0; - this.width = 0; - calculateOffsetsAndLengths(); - } - - - /********************* methods ****************************/ - - protected void calculateOffsetsAndLengths(){ - tokenWidth = tokenizerNode.getTokenLength(); - if(!tokenizerNode.isRoot()){ - --tokenWidth;//root has no parent - } - fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren()); - numCells = tokenizerNode.getNumOccurrences(); - } - - public int calculateWidth(){ - calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth()); - return width; - } - - public int calculateWidthOverrideOffsetWidth(int offsetWidth){ - width = 0; - width += UVIntTool.numBytes(tokenWidth); - width += tokenWidth; - - width += UVIntTool.numBytes(fanOut); - width += fanOut; - - width += UVIntTool.numBytes(numCells); - - if(tokenizerNode.hasOccurrences()){ - int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth() - + blockMeta.getQualifierOffsetWidth() - + blockMeta.getTimestampIndexWidth() - + blockMeta.getMvccVersionIndexWidth() - + blockMeta.getKeyValueTypeWidth() - + blockMeta.getValueOffsetWidth() - + blockMeta.getValueLengthWidth(); - width += numCells * fixedBytesPerCell; - } - - if( ! tokenizerNode.isLeaf()){ - width += fanOut * offsetWidth; - } - - return width; - } - - - /*********************** writing the compiled structure to the OutputStream ***************/ - - public void write(OutputStream os) throws IOException{ - //info about this row trie node - writeRowToken(os); - writeFan(os); - writeNumCells(os); - - //UFInt indexes and offsets for each cell in the row (if nub or leaf) - writeFamilyNodeOffsets(os); - writeQualifierNodeOffsets(os); - writeTimestampIndexes(os); - writeMvccVersionIndexes(os); - writeCellTypes(os); - writeValueOffsets(os); - writeValueLengths(os); - - //offsets to the children of this row trie node (if branch or nub) - writeNextRowTrieNodeOffsets(os); - } - - - /** - * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3 - * fields can reproduce all the row keys that compose the block. - */ - - /** - * UVInt: tokenWidth - * bytes: token - */ - protected void writeRowToken(OutputStream os) throws IOException { - UVIntTool.writeBytes(tokenWidth, os); - int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1; - ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex); - } - - /** - * UVInt: numFanBytes/fanOut - * bytes: each fan byte - */ - public void writeFan(OutputStream os) throws IOException { - UVIntTool.writeBytes(fanOut, os); - if (fanOut <= 0) { - return; - } - ArrayList children = tokenizerNode.getChildren(); - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - os.write(child.getToken().get(0));// first byte of each child's token - } - } - - /** - * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes - */ - protected void writeNumCells(OutputStream os) throws IOException { - UVIntTool.writeBytes(numCells, os); - } - - - /** - * The following methods write data for each cell in the row, mostly consisting of indexes or - * offsets into the timestamp/column data structures that are written in the middle of the block. - * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary - * search of a particular column/timestamp combination. - *

    - * Branch nodes will not have any data in these sections. - */ - - protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { - if (blockMeta.getFamilyOffsetWidth() <= 0) { - return; - } - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode - .getFirstInsertionIndex() + i : 0; - int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( - cellInsertionIndex); - int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( - sortedIndex); - UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); - } - } - - protected void writeQualifierNodeOffsets(OutputStream os) throws IOException { - if (blockMeta.getQualifierOffsetWidth() <= 0) { - return; - } - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId( - cellInsertionIndex); - int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset( - sortedIndex); - UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os); - } - } - - protected void writeTimestampIndexes(OutputStream os) throws IOException { - if (blockMeta.getTimestampIndexWidth() <= 0) { - return; - } - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex]; - int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp); - UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os); - } - } - - protected void writeMvccVersionIndexes(OutputStream os) throws IOException { - if (blockMeta.getMvccVersionIndexWidth() <= 0) { - return; - } - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex]; - int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion); - UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os); - } - } - - protected void writeCellTypes(OutputStream os) throws IOException { - if (blockMeta.isAllSameType()) { - return; - } - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]); - } - } - - protected void writeValueOffsets(OutputStream os) throws IOException { - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex); - UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os); - } - } - - protected void writeValueLengths(OutputStream os) throws IOException { - for (int i = 0; i < numCells; ++i) { - int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; - int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex); - UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os); - } - } - - - /** - * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes. - */ - protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException { - ArrayList children = tokenizerNode.getChildren(); - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex(); - UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os); - } - } -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java deleted file mode 100644 index f1dca8d..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.row; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.util.vint.UFIntTool; - -import com.google.common.collect.Lists; - -/** - * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row - * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie - * also contains references to offsets in the other sections of the data block that enable the - * decoder to match a row key with its qualifier, timestamp, type, value, etc. - *

    - * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the - * internals of each row node. - */ -@InterfaceAudience.Private -public class RowSectionWriter { - - /***************** fields **************************/ - - protected PrefixTreeEncoder prefixTreeEncoder; - - protected PrefixTreeBlockMeta blockMeta; - - protected int numBytes; - - protected ArrayList nonLeaves; - protected ArrayList leaves; - - protected ArrayList leafWriters; - protected ArrayList nonLeafWriters; - - protected int numLeafWriters; - protected int numNonLeafWriters; - - - /********************* construct **********************/ - - public RowSectionWriter() { - this.nonLeaves = Lists.newArrayList(); - this.leaves = Lists.newArrayList(); - this.leafWriters = Lists.newArrayList(); - this.nonLeafWriters = Lists.newArrayList(); - } - - public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) { - reconstruct(prefixTreeEncoder); - } - - public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) { - this.prefixTreeEncoder = prefixTreeEncoder; - this.blockMeta = prefixTreeEncoder.getBlockMeta(); - reset(); - } - - public void reset() { - numBytes = 0; - nonLeaves.clear(); - leaves.clear(); - numLeafWriters = 0; - numNonLeafWriters = 0; - } - - - /****************** methods *******************************/ - - public RowSectionWriter compile() { - blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength()); - prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes(); - - prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false); - prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true); - - // track the starting position of each node in final output - int negativeIndex = 0; - - // create leaf writer nodes - // leaf widths are known at this point, so add them up - int totalLeafBytes = 0; - for (int i = leaves.size() - 1; i >= 0; --i) { - TokenizerNode leaf = leaves.get(i); - RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf); - ++numLeafWriters; - // leaves store all but their first token byte - int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0); - totalLeafBytes += leafNodeWidth; - negativeIndex += leafNodeWidth; - leaf.setNegativeIndex(negativeIndex); - } - - int totalNonLeafBytesWithoutOffsets = 0; - int totalChildPointers = 0; - for (int i = nonLeaves.size() - 1; i >= 0; --i) { - TokenizerNode nonLeaf = nonLeaves.get(i); - RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf); - ++numNonLeafWriters; - totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0); - totalChildPointers += nonLeaf.getNumChildren(); - } - - // figure out how wide our offset FInts are - int offsetWidth = 0; - while (true) { - ++offsetWidth; - int offsetBytes = totalChildPointers * offsetWidth; - int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes; - if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) { - // it fits - numBytes = totalRowBytes; - break; - } - } - blockMeta.setNextNodeOffsetWidth(offsetWidth); - - // populate negativeIndexes - for (int i = nonLeaves.size() - 1; i >= 0; --i) { - TokenizerNode nonLeaf = nonLeaves.get(i); - int writerIndex = nonLeaves.size() - i - 1; - RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex); - int nodeWidth = nonLeafWriter.calculateWidth(); - negativeIndex += nodeWidth; - nonLeaf.setNegativeIndex(negativeIndex); - } - - return this; - } - - protected RowNodeWriter initializeWriter(List list, int index, - TokenizerNode builderNode) { - RowNodeWriter rowNodeWriter = null; - //check if there is an existing node we can recycle - if (index >= list.size()) { - //there are not enough existing nodes, so add a new one which will be retrieved below - list.add(new RowNodeWriter(prefixTreeEncoder, builderNode)); - } - rowNodeWriter = list.get(index); - rowNodeWriter.reset(builderNode); - return rowNodeWriter; - } - - - public void writeBytes(OutputStream os) throws IOException { - for (int i = numNonLeafWriters - 1; i >= 0; --i) { - RowNodeWriter nonLeafWriter = nonLeafWriters.get(i); - nonLeafWriter.write(os); - } - // duplicates above... written more for clarity right now - for (int i = numLeafWriters - 1; i >= 0; --i) { - RowNodeWriter leafWriter = leafWriters.get(i); - leafWriter.write(os); - } - } - - - /***************** static ******************************/ - - protected static ArrayList filterByLeafAndReverse( - ArrayList ins, boolean leaves) { - ArrayList outs = Lists.newArrayList(); - for (int i = ins.size() - 1; i >= 0; --i) { - TokenizerNode n = ins.get(i); - if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) { - outs.add(ins.get(i)); - } - } - return outs; - } - - - /************* get/set **************************/ - - public int getNumBytes() { - return numBytes; - } - - public ArrayList getNonLeaves() { - return nonLeaves; - } - - public ArrayList getLeaves() { - return leaves; - } - - public ArrayList getNonLeafWriters() { - return nonLeafWriters; - } - - public ArrayList getLeafWriters() { - return leafWriters; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java deleted file mode 100644 index e1082e0..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.tokenize; - -import java.util.Comparator; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Determines order of nodes in the output array. Maybe possible to optimize further. - */ -@InterfaceAudience.Private -public class TokenDepthComparator implements Comparator { - - @Override - public int compare(TokenizerNode a, TokenizerNode b) { - if(a==null){ - throw new IllegalArgumentException("a cannot be null"); - } - if(b==null){ - throw new IllegalArgumentException("b cannot be null"); - } - - // put leaves at the end - if (!a.isLeaf() && b.isLeaf()) { - return -1; - } - if (a.isLeaf() && !b.isLeaf()) { - return 1; - } - - if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability) - return a.getId() < b.getId() ? -1 : 1; - } - - // compare depth - if (a.getTokenOffset() < b.getTokenOffset()) { - return -1; - } - if (a.getTokenOffset() > b.getTokenOffset()) { - return 1; - } - - // if same depth, return lower id first. ids are unique - return a.getId() < b.getId() ? -1 : 1; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java deleted file mode 100644 index 9b43c47..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.tokenize; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ArrayUtils; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.CollectionUtils; - -import com.google.common.collect.Lists; - -/** - * Data structure used in the first stage of PrefixTree encoding: - *

  • accepts a sorted stream of ByteRanges - *
  • splits them into a set of tokens, each held by a {@link TokenizerNode} - *
  • connects the TokenizerNodes via standard java references - *
  • keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content - *


    - * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier - * encoding. - */ -@InterfaceAudience.Private -public class Tokenizer{ - - /***************** fields **************************/ - - protected int numArraysAdded = 0; - protected long lastNodeId = -1; - protected ArrayList nodes; - protected int numNodes; - protected TokenizerNode root; - protected byte[] tokens; - protected int tokensLength; - - protected int maxElementLength = 0; - // number of levels in the tree assuming root level is 0 - protected int treeDepth = 0; - - - /******************* construct *******************/ - - public Tokenizer() { - this.nodes = Lists.newArrayList(); - this.tokens = new byte[0]; - } - - public void reset() { - numArraysAdded = 0; - lastNodeId = -1; - numNodes = 0; - tokensLength = 0; - root = null; - maxElementLength = 0; - treeDepth = 0; - } - - - /***************** building *************************/ - - public void addAll(ArrayList sortedByteRanges) { - for (int i = 0; i < sortedByteRanges.size(); ++i) { - ByteRange byteRange = sortedByteRanges.get(i); - addSorted(byteRange); - } - } - - public void addSorted(final ByteRange bytes) { - ++numArraysAdded; - if (bytes.getLength() > maxElementLength) { - maxElementLength = bytes.getLength(); - } - if (root == null) { - // nodeDepth of firstNode (non-root) is 1 - root = addNode(null, 1, 0, bytes, 0); - } else { - root.addSorted(bytes); - } - } - - public void incrementNumOccurrencesOfLatestValue(){ - CollectionUtils.getLast(nodes).incrementNumOccurrences(1); - } - - protected long nextNodeId() { - return ++lastNodeId; - } - - protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset, - final ByteRange token, int inputTokenOffset) { - int inputTokenLength = token.getLength() - inputTokenOffset; - int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset); - TokenizerNode node = null; - if (nodes.size() <= numNodes) { - node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset, - inputTokenLength); - nodes.add(node); - } else { - node = nodes.get(numNodes); - node.reset(); - node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength); - } - ++numNodes; - return node; - } - - protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) { - int newOffset = tokensLength; - int inputTokenLength = token.getLength() - inputTokenOffset; - int newMinimum = tokensLength + inputTokenLength; - tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum); - token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength); - tokensLength += inputTokenLength; - return newOffset; - } - - protected void submitMaxNodeDepthCandidate(int nodeDepth) { - if (nodeDepth > treeDepth) { - treeDepth = nodeDepth; - } - } - - - /********************* read ********************/ - - public int getNumAdded(){ - return numArraysAdded; - } - - // for debugging - public ArrayList getNodes(boolean includeNonLeaves, boolean includeLeaves) { - ArrayList nodes = Lists.newArrayList(); - root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves); - return nodes; - } - - public void appendNodes(List appendTo, boolean includeNonLeaves, - boolean includeLeaves) { - root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); - } - - public List getArrays() { - List nodes = new ArrayList(); - root.appendNodesToExternalList(nodes, true, true); - List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes)); - for (int i = 0; i < nodes.size(); ++i) { - TokenizerNode node = nodes.get(i); - for (int j = 0; j < node.getNumOccurrences(); ++j) { - byte[] byteArray = node.getNewByteArray(); - byteArrays.add(byteArray); - } - } - return byteArrays; - } - - //currently unused, but working and possibly useful in the future - public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, - int keyLength) { - root.getNode(resultHolder, key, keyOffset, keyLength); - } - - - /********************** write ***************************/ - - public Tokenizer setNodeFirstInsertionIndexes() { - root.setInsertionIndexes(0); - return this; - } - - public Tokenizer appendOutputArrayOffsets(List offsets) { - root.appendOutputArrayOffsets(offsets); - return this; - } - - - /********************* print/debug ********************/ - - protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false; - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(getStructuralString()); - if (INCLUDE_FULL_TREE_IN_TO_STRING) { - for (byte[] bytes : getArrays()) { - if (sb.length() > 0) { - sb.append("\n"); - } - sb.append(Bytes.toString(bytes)); - } - } - return sb.toString(); - } - - public String getStructuralString() { - List nodes = getNodes(true, true); - StringBuilder sb = new StringBuilder(); - for (TokenizerNode node : nodes) { - String line = node.getPaddedTokenAndOccurrenceString(); - sb.append(line + "\n"); - } - return sb.toString(); - } - - - /****************** get/set ************************/ - - public TokenizerNode getRoot() { - return root; - } - - public int getMaxElementLength() { - return maxElementLength; - } - - public int getTreeDepth() { - return treeDepth; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java deleted file mode 100644 index 2b8a86c..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.tokenize; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hadoop.hbase.util.Strings; - -import com.google.common.collect.Lists; - -/** - * Individual node in a Trie structure. Each node is one of 3 types: - *

  • Branch: an internal trie node that may have a token and must have multiple children, but does - * not represent an actual input byte[], hence its numOccurrences is 0 - *
  • Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the - * last bytes in the input byte[]s. - *
  • Nub: a combination of a branch and leaf. Its token represents the last bytes of input - * byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s - * that add bytes to this nodes input byte[]. - *

    - * Example inputs (numInputs=7): - * 0: AAA - * 1: AAA - * 2: AAB - * 3: AAB - * 4: AAB - * 5: AABQQ - * 6: AABQQ - *

    - * Resulting TokenizerNodes: - * AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2 - * A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1 - * B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1 - * QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2 - *

    - * numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2 - */ -@InterfaceAudience.Private -public class TokenizerNode{ - - /* - * Ref to data structure wrapper - */ - protected Tokenizer builder; - - /****************************************************************** - * Tree content/structure used during tokenization - * ****************************************************************/ - - /* - * ref to parent trie node - */ - protected TokenizerNode parent; - - /* - * node depth in trie, irrespective of each node's token length - */ - protected int nodeDepth; - - /* - * start index of this token in original byte[] - */ - protected int tokenStartOffset; - - /* - * bytes for this trie node. can be length 0 in root node - */ - protected ByteRange token; - - /* - * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for - * nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing - * that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode. - */ - protected int numOccurrences; - - /* - * The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256 - * child nodes. - */ - protected ArrayList children; - - - /* - * Fields used later in the encoding process for sorting the nodes into the order they'll be - * written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer - * are not generic data structures but instead are specific to HBase PrefixTree encoding. - */ - - /* - * unique id assigned to each TokenizerNode - */ - protected long id; - - /* - * set >=0 for nubs and leaves - */ - protected int firstInsertionIndex = -1; - - /* - * A positive value indicating how many bytes before the end of the block this node will start. If - * the section is 55 bytes and negativeOffset is 9, then the node will start at 46. - */ - protected int negativeIndex = 0; - - /* - * The offset in the output array at which to start writing this node's token bytes. Influenced - * by the lengths of all tokens sorted before this one. - */ - protected int outputArrayOffset = -1; - - - /*********************** construct *****************************/ - - public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth, - int tokenStartOffset, int tokenOffset, int tokenLength) { - this.token = new ByteRange(); - reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength); - this.children = Lists.newArrayList(); - } - - /* - * Sub-constructor for initializing all fields without allocating a new object. Used by the - * regular constructor. - */ - public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth, - int tokenStartOffset, int tokenOffset, int tokenLength) { - this.builder = builder; - this.id = builder.nextNodeId(); - this.parent = parent; - this.nodeDepth = nodeDepth; - builder.submitMaxNodeDepthCandidate(nodeDepth); - this.tokenStartOffset = tokenStartOffset; - this.token.set(builder.tokens, tokenOffset, tokenLength); - this.numOccurrences = 1; - } - - /* - * Clear the state of this node so that it looks like it was just allocated. - */ - public void reset() { - builder = null; - parent = null; - nodeDepth = 0; - tokenStartOffset = 0; - token.clear(); - numOccurrences = 0; - children.clear();// branches & nubs - - // ids/offsets. used during writing to byte[] - id = 0; - firstInsertionIndex = -1;// set >=0 for nubs and leaves - negativeIndex = 0; - outputArrayOffset = -1; - } - - - /************************* building *********************************/ - - /* - *
  • Only public method used during the tokenization process - *
  • Requires that the input ByteRange sort after the previous, and therefore after all previous - * inputs - *
  • Only looks at bytes of the input array that align with this node's token - */ - public void addSorted(final ByteRange bytes) {// recursively build the tree - - /* - * Recurse deeper into the existing trie structure - */ - if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) { - TokenizerNode lastChild = CollectionUtils.getLast(children); - if (lastChild.partiallyMatchesToken(bytes)) { - lastChild.addSorted(bytes); - return; - } - } - - /* - * Recursion ended. We must either - *
  • 1: increment numOccurrences if this input was equal to the previous - *
  • 2: convert this node from a leaf to a nub, and add a new child leaf - *
  • 3: split this node into a branch and leaf, and then add a second leaf - */ - - // add it as a child of this node - int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length - int tailOffset = tokenStartOffset + numIdenticalTokenBytes; - int tailLength = bytes.getLength() - tailOffset; - - if (numIdenticalTokenBytes == token.getLength()) { - if (tailLength == 0) {// identical to this node (case 1) - incrementNumOccurrences(1); - } else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2) - int childNodeDepth = nodeDepth + 1; - int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes; - TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset, - bytes, tailOffset); - addChild(newChildNode); - } - } else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3) - split(numIdenticalTokenBytes, bytes); - } - } - - - protected void addChild(TokenizerNode node) { - node.setParent(this); - children.add(node); - } - - - /** - * Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the - * method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output - * will be 3 nodes:
    - *
  • 1: B <- branch - *
  • 2: AA <- leaf - *
  • 3: OO <- leaf - * - * @param numTokenBytesToRetain => 1 (the B) - * @param bytes => BOO - */ - protected void split(int numTokenBytesToRetain, final ByteRange bytes) { - int childNodeDepth = nodeDepth; - int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain; - - //create leaf AA - TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, - token, numTokenBytesToRetain); - firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences - token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B - numOccurrences = 0;//current node is now a branch - - moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B) - addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children - - //create leaf OO - TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, - bytes, tokenStartOffset + numTokenBytesToRetain); - addChild(secondChild);//add the new leaf (00) to the branch's (B's) children - - // we inserted branch node B as a new level above/before the two children, so increment the - // depths of the children below - firstChild.incrementNodeDepthRecursively(); - secondChild.incrementNodeDepthRecursively(); - } - - - protected void incrementNodeDepthRecursively() { - ++nodeDepth; - builder.submitMaxNodeDepthCandidate(nodeDepth); - for (int i = 0; i < children.size(); ++i) { - children.get(i).incrementNodeDepthRecursively(); - } - } - - - protected void moveChildrenToDifferentParent(TokenizerNode newParent) { - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - child.setParent(newParent); - newParent.children.add(child); - } - children.clear(); - } - - - /************************ byte[] utils *************************/ - - protected boolean partiallyMatchesToken(ByteRange bytes) { - return numIdenticalBytes(bytes) > 0; - } - - protected boolean matchesToken(ByteRange bytes) { - return numIdenticalBytes(bytes) == getTokenLength(); - } - - protected int numIdenticalBytes(ByteRange bytes) { - return token.numEqualPrefixBytes(bytes, tokenStartOffset); - } - - - /***************** moving nodes around ************************/ - - public void appendNodesToExternalList(List appendTo, boolean includeNonLeaves, - boolean includeLeaves) { - if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) { - appendTo.add(this); - } - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); - } - } - - public int setInsertionIndexes(int nextIndex) { - int newNextIndex = nextIndex; - if (hasOccurrences()) { - setFirstInsertionIndex(nextIndex); - newNextIndex += numOccurrences; - } - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - newNextIndex = child.setInsertionIndexes(newNextIndex); - } - return newNextIndex; - } - - public void appendOutputArrayOffsets(List offsets) { - if (hasOccurrences()) { - offsets.add(outputArrayOffset); - } - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - child.appendOutputArrayOffsets(offsets); - } - } - - - /***************** searching *********************************/ - - /* - * Do a trie style search through the tokenizer. One option for looking up families or qualifiers - * during encoding, but currently unused in favor of tracking this information as they are added. - * - * Keeping code pending further performance testing. - */ - public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, - int keyLength) { - int thisNodeDepthPlusLength = tokenStartOffset + token.getLength(); - - // quick check if the key is shorter than this node (may not work for binary search) - if (CollectionUtils.isEmpty(children)) { - if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes - resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); - return; - } - } - - // all token bytes must match - for (int i = 0; i < token.getLength(); ++i) { - if (key[tokenStartOffset + keyOffset + i] != token.get(i)) { - // TODO return whether it's before or after so we can binary search - resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); - return; - } - } - - if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) { - resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH - return; - } - - if (CollectionUtils.notEmpty(children)) { - // TODO binary search the children - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - child.getNode(resultHolder, key, keyOffset, keyLength); - if (resultHolder.isMatch()) { - return; - } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) { - // passed it, so it doesn't exist - resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); - return; - } - // key is still AFTER the current node, so continue searching - } - } - - // checked all children (or there were no children), and didn't find it - resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); - return; - } - - - /****************** writing back to byte[]'s *************************/ - - public byte[] getNewByteArray() { - byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()]; - fillInBytes(arrayToFill); - return arrayToFill; - } - - public void fillInBytes(byte[] arrayToFill) { - for (int i = 0; i < token.getLength(); ++i) { - arrayToFill[tokenStartOffset + i] = token.get(i); - } - if (parent != null) { - parent.fillInBytes(arrayToFill); - } - } - - - /************************** printing ***********************/ - - @Override - public String toString() { - String s = ""; - if (parent == null) { - s += "R "; - } else { - s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray()); - } - s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]"; - if (numOccurrences > 0) { - s += "x" + numOccurrences; - } - return s; - } - - public String getPaddedTokenAndOccurrenceString() { - StringBuilder sb = new StringBuilder(); - sb.append(getBnlIndicator(true)); - sb.append(Strings.padFront(numOccurrences + "", ' ', 3)); - sb.append(Strings.padFront(nodeDepth + "", ' ', 3)); - if (outputArrayOffset >= 0) { - sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3)); - } - sb.append(" "); - for (int i = 0; i < tokenStartOffset; ++i) { - sb.append(" "); - } - sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_")); - return sb.toString(); - } - - public String getBnlIndicator(boolean indent) { - if (indent) { - if (isNub()) { - return " N "; - } - return isBranch() ? "B " : " L"; - } - if (isNub()) { - return "N"; - } - return isBranch() ? "B" : "L"; - } - - - /********************** count different node types ********************/ - - public int getNumBranchNodesIncludingThisNode() { - if (isLeaf()) { - return 0; - } - int totalFromThisPlusChildren = isBranch() ? 1 : 0; - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode(); - } - return totalFromThisPlusChildren; - } - - public int getNumNubNodesIncludingThisNode() { - if (isLeaf()) { - return 0; - } - int totalFromThisPlusChildren = isNub() ? 1 : 0; - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode(); - } - return totalFromThisPlusChildren; - } - - public int getNumLeafNodesIncludingThisNode() { - if (isLeaf()) { - return 1; - } - int totalFromChildren = 0; - for (int i = 0; i < children.size(); ++i) { - TokenizerNode child = children.get(i); - totalFromChildren += child.getNumLeafNodesIncludingThisNode(); - } - return totalFromChildren; - } - - - /*********************** simple read-only methods *******************************/ - - public int getNodeDepth() { - return nodeDepth; - } - - public int getTokenLength() { - return token.getLength(); - } - - public boolean hasOccurrences() { - return numOccurrences > 0; - } - - public boolean isRoot() { - return this.parent == null; - } - - public int getNumChildren() { - return CollectionUtils.nullSafeSize(children); - } - - public TokenizerNode getLastChild() { - if (CollectionUtils.isEmpty(children)) { - return null; - } - return CollectionUtils.getLast(children); - } - - public boolean isLeaf() { - return CollectionUtils.isEmpty(children) && hasOccurrences(); - } - - public boolean isBranch() { - return CollectionUtils.notEmpty(children) && !hasOccurrences(); - } - - public boolean isNub() { - return CollectionUtils.notEmpty(children) && hasOccurrences(); - } - - - /********************** simple mutation methods *************************/ - - /** - * Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by - * an external class without going through the process of detecting a repeat if it is a known - * repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows - * the new cells are part of the current row. - * @param d increment by this amount - */ - public void incrementNumOccurrences(int d) { - numOccurrences += d; - } - - - /************************* autogenerated get/set ******************/ - - public int getTokenOffset() { - return tokenStartOffset; - } - - public TokenizerNode getParent() { - return parent; - } - - public ByteRange getToken() { - return token; - } - - public int getNumOccurrences() { - return numOccurrences; - } - - public void setParent(TokenizerNode parent) { - this.parent = parent; - } - - public void setNumOccurrences(int numOccurrences) { - this.numOccurrences = numOccurrences; - } - - public ArrayList getChildren() { - return children; - } - - public long getId() { - return id; - } - - public int getFirstInsertionIndex() { - return firstInsertionIndex; - } - - public void setFirstInsertionIndex(int firstInsertionIndex) { - this.firstInsertionIndex = firstInsertionIndex; - } - - public int getNegativeIndex() { - return negativeIndex; - } - - public void setNegativeIndex(int negativeIndex) { - this.negativeIndex = negativeIndex; - } - - public int getOutputArrayOffset() { - return outputArrayOffset; - } - - public void setOutputArrayOffset(int outputArrayOffset) { - this.outputArrayOffset = outputArrayOffset; - } - - public void setId(long id) { - this.id = id; - } - - public void setBuilder(Tokenizer builder) { - this.builder = builder; - } - - public void setTokenOffset(int tokenOffset) { - this.tokenStartOffset = tokenOffset; - } - - public void setToken(ByteRange token) { - this.token = token; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java deleted file mode 100644 index 6494ba1..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.tokenize; - -import org.apache.hadoop.classification.InterfaceAudience; - - -/** - * Warning: currently unused, but code is valid. Pending performance testing on more data sets. - * - * Where is the key relative to our current position in the tree. For example, the current tree node - * is "BEFORE" the key we are seeking - */ -@InterfaceAudience.Private -public enum TokenizerRowSearchPosition { - - AFTER,//the key is after this tree node, so keep searching - BEFORE,//in a binary search, this tells us to back up - MATCH,//the current node is a full match - NO_MATCH,//might as well return a value more informative than null - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java deleted file mode 100644 index e7f5433..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.encode.tokenize; - -import org.apache.hadoop.classification.InterfaceAudience; - - -/** - * for recursively searching a PtBuilder - */ -@InterfaceAudience.Private -public class TokenizerRowSearchResult{ - - /************ fields ************************/ - - protected TokenizerRowSearchPosition difference; - protected TokenizerNode matchingNode; - - - /*************** construct *****************/ - - public TokenizerRowSearchResult() { - } - - public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) { - this.difference = difference; - } - - public TokenizerRowSearchResult(TokenizerNode matchingNode) { - this.difference = TokenizerRowSearchPosition.MATCH; - this.matchingNode = matchingNode; - } - - - /*************** methods **********************/ - - public boolean isMatch() { - return TokenizerRowSearchPosition.MATCH == difference; - } - - - /************* get/set ***************************/ - - public TokenizerRowSearchPosition getDifference() { - return difference; - } - - public TokenizerNode getMatchingNode() { - return matchingNode; - } - - public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) { - this.difference = difference; - this.matchingNode = matchingNode; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java deleted file mode 100644 index a8f0541..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScannerPosition.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.scanner; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - * An indicator of the state of the scanner after an operation such as nextCell() or - * positionAt(..). For example: - *
      - *
    • In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that - * it should load the next block.
    • - *
    • In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted. - *
    • - *
    • In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the - * next region.
    • - *
    - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public enum CellScannerPosition { - - /** - * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first - * cell. - */ - BEFORE_FIRST, - - /** - * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), - * rather it is the nearest cell before the requested cell. - */ - BEFORE, - - /** - * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by - * positionAt(..). - */ - AT, - - /** - * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), - * rather it is the nearest cell after the requested cell. - */ - AFTER, - - /** - * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect. - */ - AFTER_LAST - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java deleted file mode 100644 index e55c559..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.scanner; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Cell; - -/** - * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that - * the implementation is able to navigate between cells without iterating through every cell. - */ -@InterfaceAudience.Private -public interface CellSearcher extends ReversibleCellScanner { - /** - * Reset any state in the scanner so it appears it was freshly opened. - */ - void resetToBeforeFirstEntry(); - - /** - * Do everything within this scanner's power to find the key. Look forward and backwards. - *

    - * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state. - *

    - * @param key position the CellScanner exactly on this key - * @return true if the cell existed and getCurrentCell() holds a valid cell - */ - boolean positionAt(Cell key); - - /** - * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no - * exact match. - *

    - * @param key position the CellScanner on this key or the closest cell before - * @return AT if exact match
    - * BEFORE if on last cell before key
    - * BEFORE_FIRST if key was before the first cell in this scanner's scope - */ - CellScannerPosition positionAtOrBefore(Cell key); - - /** - * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact - * match. - *

    - * @param key position the CellScanner on this key or the closest cell after - * @return AT if exact match
    - * AFTER if on first cell after key
    - * AFTER_LAST if key was after the last cell in this scanner's scope - */ - CellScannerPosition positionAtOrAfter(Cell key); - - /** - * Note: Added for backwards compatibility with - * {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek} - *

    - * Look for the key, but only look after the current position. Probably not needed for an - * efficient tree implementation, but is important for implementations without random access such - * as unencoded KeyValue blocks. - *

    - * @param key position the CellScanner exactly on this key - * @return true if getCurrent() holds a valid cell - */ - boolean seekForwardTo(Cell key); - - /** - * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no - * exact match. - *

    - * @param key - * @return AT if exact match
    - * AFTER if on first cell after key
    - * AFTER_LAST if key was after the last cell in this scanner's scope - */ - CellScannerPosition seekForwardToOrBefore(Cell key); - - /** - * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no - * exact match. - *

    - * @param key - * @return AT if exact match
    - * AFTER if on first cell after key
    - * AFTER_LAST if key was after the last cell in this scanner's scope - */ - CellScannerPosition seekForwardToOrAfter(Cell key); - - /** - * Note: This may not be appropriate to have in the interface. Need to investigate. - *

    - * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST. - * This is used by tests and for handling certain edge cases. - */ - void positionAfterLastCell(); - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java deleted file mode 100644 index b4463d8..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.scanner; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.CellScanner; - -/** - * An extension of CellScanner indicating the scanner supports iterating backwards through cells. - *

    - * Note: This was not added to suggest that HBase should support client facing reverse Scanners, - * but - * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing - * up if the positionAt(..) method goes past the requested cell. - */ -@InterfaceAudience.Private -public interface ReversibleCellScanner extends CellScanner { - - /** - * Try to position the scanner one Cell before the current position. - * @return true if the operation was successful, meaning getCurrentCell() will return a valid - * Cell.
    - * false if there were no previous cells, meaning getCurrentCell() will return null. - * Scanner position will be - * {@link org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST} - */ - boolean previous(); - - /** - * Try to position the scanner in the row before the current row. - * @param endOfRow true for the last cell in the previous row; false for the first cell - * @return true if the operation was successful, meaning getCurrentCell() will return a valid - * Cell.
    - * false if there were no previous cells, meaning getCurrentCell() will return null. - * Scanner position will be - * {@link org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition#BEFORE_FIRST} - */ - boolean previousRow(boolean endOfRow); -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java deleted file mode 100644 index b2d1526..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.byterange; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ArrayUtils; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; - -import com.google.common.collect.Lists; - -/** - * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted - * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage. - *

    - * Current implementations are {@link org.apache.hbase.util.byterange.impl.ByteRangeHashSet} and - * {@link org.apache.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a - * trie-oriented ByteRangeTrieSet, etc - */ -@InterfaceAudience.Private -public abstract class ByteRangeSet { - - /******************** fields **********************/ - - protected byte[] byteAppender; - protected int numBytes; - - protected Map uniqueIndexByUniqueRange; - - protected ArrayList uniqueRanges; - protected int numUniqueRanges = 0; - - protected int[] uniqueRangeIndexByInsertionId; - protected int numInputs; - - protected List sortedIndexByUniqueIndex; - protected int[] sortedIndexByInsertionId; - protected ArrayList sortedRanges; - - - /****************** construct **********************/ - - protected ByteRangeSet() { - this.byteAppender = new byte[0]; - this.uniqueRanges = Lists.newArrayList(); - this.uniqueRangeIndexByInsertionId = new int[0]; - this.sortedIndexByUniqueIndex = Lists.newArrayList(); - this.sortedIndexByInsertionId = new int[0]; - this.sortedRanges = Lists.newArrayList(); - } - - public void reset() { - numBytes = 0; - uniqueIndexByUniqueRange.clear(); - numUniqueRanges = 0; - numInputs = 0; - sortedIndexByUniqueIndex.clear(); - sortedRanges.clear(); - } - - - /*************** abstract *************************/ - - public abstract void addToSortedRanges(); - - - /**************** methods *************************/ - - /** - * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and - * insert it into the tracking Map uniqueIndexByUniqueRange. - */ - public void add(ByteRange bytes) { - Integer index = uniqueIndexByUniqueRange.get(bytes); - if (index == null) { - index = store(bytes); - } - int minLength = numInputs + 1; - uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId, - minLength, 2 * minLength); - uniqueRangeIndexByInsertionId[numInputs] = index; - ++numInputs; - } - - protected int store(ByteRange bytes) { - int indexOfNewElement = numUniqueRanges; - if (uniqueRanges.size() <= numUniqueRanges) { - uniqueRanges.add(new ByteRange()); - } - ByteRange storedRange = uniqueRanges.get(numUniqueRanges); - int neededBytes = numBytes + bytes.getLength(); - byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes); - bytes.deepCopyTo(byteAppender, numBytes); - storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet - numBytes += bytes.getLength(); - uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement); - int newestUniqueIndex = numUniqueRanges; - ++numUniqueRanges; - return newestUniqueIndex; - } - - public ByteRangeSet compile() { - addToSortedRanges(); - for (int i = 0; i < sortedRanges.size(); ++i) { - sortedIndexByUniqueIndex.add(null);// need to grow the size - } - // TODO move this to an invert(int[]) util method - for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) { - int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i)); - sortedIndexByUniqueIndex.set(uniqueIndex, i); - } - sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs, - numInputs); - for (int i = 0; i < numInputs; ++i) { - int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i]; - int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex); - sortedIndexByInsertionId[i] = sortedIndex; - } - return this; - } - - public int getSortedIndexForInsertionId(int insertionId) { - return sortedIndexByInsertionId[insertionId]; - } - - public int size() { - return uniqueIndexByUniqueRange.size(); - } - - - /***************** standard methods ************************/ - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - int i = 0; - for (ByteRange r : sortedRanges) { - if (i > 0) { - sb.append("\n"); - } - sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray())); - ++i; - } - sb.append("\ntotalSize:" + numBytes); - sb.append("\navgSize:" + getAvgSize()); - return sb.toString(); - } - - - /**************** get/set *****************************/ - - public ArrayList getSortedRanges() { - return sortedRanges; - } - - public long getAvgSize() { - return numBytes / numUniqueRanges; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java deleted file mode 100644 index 8787f39..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.byterange.impl; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hadoop.hbase.util.IterableUtils; -import org.apache.hbase.util.byterange.ByteRangeSet; - -/** - * This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces - * garbage when adding a new element to it. We can probably create a tighter implementation without - * pointers or garbage. - */ -@InterfaceAudience.Private -public class ByteRangeHashSet extends ByteRangeSet { - - /************************ constructors *****************************/ - - public ByteRangeHashSet() { - this.uniqueIndexByUniqueRange = new HashMap(); - } - - public ByteRangeHashSet(List rawByteArrays) { - for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) { - add(in); - } - } - - @Override - public void addToSortedRanges() { - sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); - Collections.sort(sortedRanges); - } - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java deleted file mode 100644 index 9499e56..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.byterange.impl; - -import java.util.List; -import java.util.TreeMap; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hadoop.hbase.util.IterableUtils; -import org.apache.hbase.util.byterange.ByteRangeSet; - -/** - * Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet. - */ -@InterfaceAudience.Private -public class ByteRangeTreeSet extends ByteRangeSet { - - /************************ constructors *****************************/ - - public ByteRangeTreeSet() { - this.uniqueIndexByUniqueRange = new TreeMap(); - } - - public ByteRangeTreeSet(List rawByteArrays) { - this();//needed to initialize the TreeSet - for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){ - add(in); - } - } - - @Override - public void addToSortedRanges() { - sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); - } - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java deleted file mode 100644 index 278ac55..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.IOException; -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * UFInt is an abbreviation for Unsigned Fixed-width Integer. - * - * This class converts between positive ints and 1-4 bytes that represent the int. All input ints - * must be positive. Max values stored in N bytes are: - * - * N=1: 2^8 => 256 - * N=2: 2^16 => 65,536 - * N=3: 2^24 => 16,777,216 - * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE) - * - * This was created to get most of the memory savings of a variable length integer when encoding - * an array of input integers, but to fix the number of bytes for each integer to the number needed - * to store the maximum integer in the array. This enables a binary search to be performed on the - * array of encoded integers. - * - * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if - * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the - * numbers will also require 2 bytes. - * - * warnings: - * * no input validation for max performance - * * no negatives - */ -@InterfaceAudience.Private -public class UFIntTool { - - private static final int NUM_BITS_IN_LONG = 64; - - public static long maxValueForNumBytes(int numBytes) { - return (1L << (numBytes * 8)) - 1; - } - - public static int numBytes(final long value) { - if (value == 0) {// 0 doesn't work with the formula below - return 1; - } - return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8; - } - - public static byte[] getBytes(int outputWidth, final long value) { - byte[] bytes = new byte[outputWidth]; - writeBytes(outputWidth, value, bytes, 0); - return bytes; - } - - public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) { - bytes[offset + outputWidth - 1] = (byte) value; - for (int i = outputWidth - 2; i >= 0; --i) { - bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8); - } - } - - private static final long[] MASKS = new long[] { - (long) 255, - (long) 255 << 8, - (long) 255 << 16, - (long) 255 << 24, - (long) 255 << 32, - (long) 255 << 40, - (long) 255 << 48, - (long) 255 << 56 - }; - - public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException { - for (int i = outputWidth - 1; i >= 0; --i) { - os.write((byte) ((value & MASKS[i]) >>> (8 * i))); - } - } - - public static long fromBytes(final byte[] bytes) { - long value = 0; - value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int - for (int i = 1; i < bytes.length; ++i) { - value <<= 8; - value |= bytes[i] & 0xff; - } - return value; - } - - public static long fromBytes(final byte[] bytes, final int offset, final int width) { - long value = 0; - value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int - for (int i = 1; i < width; ++i) { - value <<= 8; - value |= bytes[i + offset] & 0xff; - } - return value; - } - -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java deleted file mode 100644 index c0d29e4..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left - * bit of the current byte is 1, then there is at least one more byte. - */ -@InterfaceAudience.Private -public class UVIntTool { - - public static final byte - BYTE_7_RIGHT_BITS_SET = 127, - BYTE_LEFT_BIT_SET = -128; - - public static final long - INT_7_RIGHT_BITS_SET = 127, - INT_8TH_BIT_SET = 128; - - public static final byte[] - MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 }; - - /********************* int -> bytes **************************/ - - public static int numBytes(int in) { - if (in == 0) { - // doesn't work with the formula below - return 1; - } - return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1) - } - - public static byte[] getBytes(int value) { - int numBytes = numBytes(value); - byte[] bytes = new byte[numBytes]; - int remainder = value; - for (int i = 0; i < numBytes - 1; ++i) { - // set the left bit - bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET); - remainder >>= 7; - } - // do not set the left bit - bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET); - return bytes; - } - - public static int writeBytes(int value, OutputStream os) throws IOException { - int numBytes = numBytes(value); - int remainder = value; - for (int i = 0; i < numBytes - 1; ++i) { - // set the left bit - os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET)); - remainder >>= 7; - } - // do not set the left bit - os.write((byte) (remainder & INT_7_RIGHT_BITS_SET)); - return numBytes; - } - - /******************** bytes -> int **************************/ - - public static int getInt(byte[] bytes) { - return getInt(bytes, 0); - } - - public static int getInt(byte[] bytes, int offset) { - int value = 0; - for (int i = 0;; ++i) { - byte b = bytes[offset + i]; - int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit - shifted <<= 7 * i; - value |= shifted; - if (b >= 0) { - break; - } - } - return value; - } - - public static int getInt(InputStream is) throws IOException { - int value = 0; - int i = 0; - int b; - do{ - b = is.read(); - int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit - shifted <<= 7 * i; - value |= shifted; - ++i; - }while(b > Byte.MAX_VALUE); - return value; - } -} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java deleted file mode 100644 index ec95ae8..0000000 --- a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left - * bit of the current byte is 1, then there is at least one more byte. - */ -@InterfaceAudience.Private -public class UVLongTool{ - - public static final byte - BYTE_7_RIGHT_BITS_SET = 127, - BYTE_LEFT_BIT_SET = -128; - - public static final long - LONG_7_RIGHT_BITS_SET = 127, - LONG_8TH_BIT_SET = 128; - - public static final byte[] - MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 }; - - - /********************* long -> bytes **************************/ - - public static int numBytes(long in) {// do a check for illegal arguments if not protected - if (in == 0) { - return 1; - }// doesn't work with the formula below - return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1) - } - - public static byte[] getBytes(long value) { - int numBytes = numBytes(value); - byte[] bytes = new byte[numBytes]; - long remainder = value; - for (int i = 0; i < numBytes - 1; ++i) { - bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit - remainder >>= 7; - } - bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit - return bytes; - } - - public static int writeBytes(long value, OutputStream os) throws IOException { - int numBytes = numBytes(value); - long remainder = value; - for (int i = 0; i < numBytes - 1; ++i) { - // set the left bit - os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET)); - remainder >>= 7; - } - // do not set the left bit - os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET)); - return numBytes; - } - - /******************** bytes -> long **************************/ - - public static long getLong(byte[] bytes) { - return getLong(bytes, 0); - } - - public static long getLong(byte[] bytes, int offset) { - long value = 0; - for (int i = 0;; ++i) { - byte b = bytes[offset + i]; - long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit - shifted <<= 7 * i; - value |= shifted; - if (b >= 0) { - break; - }// first bit was 0, so that's the last byte in the VarLong - } - return value; - } - - public static long getLong(InputStream is) throws IOException { - long value = 0; - int i = 0; - int b; - do { - b = is.read(); - long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit - shifted <<= 7 * i; - value |= shifted; - ++i; - } while (b > Byte.MAX_VALUE); - return value; - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java new file mode 100644 index 0000000..5bc4186 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.keyvalue; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestKeyValueTool { + + @Parameters + public static Collection parameters() { + return new TestRowData.InMemory().getAllAsObjectArray(); + } + + private TestRowData rows; + + public TestKeyValueTool(TestRowData testRows) { + this.rows = testRows; + } + + @Test + public void testRoundTripToBytes() { + List kvs = rows.getInputs(); + ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false); + List roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false); + Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray()); + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeTestConstants.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeTestConstants.java new file mode 100644 index 0000000..aabc0f0 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeTestConstants.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree; + +import org.apache.hadoop.hbase.util.Bytes; + +public class PrefixTreeTestConstants { + + public static final byte[] TEST_CF = Bytes.toBytes("cfDefault"); + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java new file mode 100644 index 0000000..57f6630 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.blockmeta; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.junit.Assert; +import org.junit.Test; + +public class TestBlockMeta { + + static int BLOCK_START = 123; + + private static PrefixTreeBlockMeta createSample() { + PrefixTreeBlockMeta m = new PrefixTreeBlockMeta(); + m.setNumMetaBytes(0); + m.setNumKeyValueBytes(3195); + + m.setNumRowBytes(0); + m.setNumFamilyBytes(3); + m.setNumQualifierBytes(12345); + m.setNumTimestampBytes(23456); + m.setNumMvccVersionBytes(5); + m.setNumValueBytes(34567); + + m.setNextNodeOffsetWidth(3); + m.setFamilyOffsetWidth(1); + m.setQualifierOffsetWidth(2); + m.setTimestampIndexWidth(1); + m.setMvccVersionIndexWidth(2); + m.setValueOffsetWidth(8); + m.setValueLengthWidth(3); + + m.setRowTreeDepth(11); + m.setMaxRowLength(200); + m.setMaxQualifierLength(50); + + m.setMinTimestamp(1318966363481L); + m.setTimestampDeltaWidth(3); + m.setMinMvccVersion(100L); + m.setMvccVersionDeltaWidth(4); + + m.setAllSameType(false); + m.setAllTypes(KeyValue.Type.Delete.getCode()); + + m.setNumUniqueRows(88); + m.setNumUniqueFamilies(1); + m.setNumUniqueQualifiers(56); + return m; + } + + @Test + public void testStreamSerialization() throws IOException { + PrefixTreeBlockMeta original = createSample(); + ByteArrayOutputStream os = new ByteArrayOutputStream(10000); + original.writeVariableBytesToOutputStream(os); + ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray()); + PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(buffer); + Assert.assertTrue(original.equals(roundTripped)); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizer.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizer.java new file mode 100644 index 0000000..12bd404 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizer.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.builder; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestTokenizer { + + @Parameters + public static Collection parameters() { + return new TestTokenizerData.InMemory().getAllAsObjectArray(); + } + + private List inputs; + private Tokenizer builder; + private List roundTripped; + + public TestTokenizer(TestTokenizerData sortedByteArrays) { + this.inputs = sortedByteArrays.getInputs(); + this.builder = new Tokenizer(); + for (byte[] array : inputs) { + builder.addSorted(new ByteRange(array)); + } + this.roundTripped = builder.getArrays(); + } + + @Test + public void testReaderRoundTrip() { + Assert.assertEquals(inputs.size(), roundTripped.size()); + Assert.assertTrue(Bytes.isSorted(roundTripped)); + Assert.assertTrue(Bytes.equals(inputs, roundTripped)); + } + + @Test + public void testSearching() { + for (byte[] input : inputs) { + TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult(); + builder.getNode(resultHolder, input, 0, input.length); + TokenizerNode n = resultHolder.getMatchingNode(); + byte[] output = n.getNewByteArray(); + Assert.assertTrue(Bytes.equals(input, output)); + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizerData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizerData.java new file mode 100644 index 0000000..9576bb5 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTokenizerData.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.builder; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic; +import org.apache.hadoop.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase; + +import com.google.common.collect.Lists; + +public interface TestTokenizerData { + + List getInputs(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestTokenizerDataBasic() }); + all.add(new Object[] { new TestTokenizerDataEdgeCase() }); + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTreeDepth.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTreeDepth.java new file mode 100644 index 0000000..d0a47b9 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/TestTreeDepth.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.builder; + +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; +import org.mortbay.log.Log; + +import com.google.common.collect.Lists; + +public class TestTreeDepth { + + @Test + public void testSingleNode() { + List inputs = Lists.newArrayList("a"); + testInternal(inputs, 1); + } + + @Test + public void testSimpleBranch() { + List inputs = Lists.newArrayList("a", "aa", "ab"); + testInternal(inputs, 2); + } + + @Test + public void testEmptyRoot() { + List inputs = Lists.newArrayList("a", "b"); + testInternal(inputs, 2); + } + + @Test + public void testRootAsNub() { + List inputs = Lists.newArrayList("a", "aa"); + testInternal(inputs, 2); + } + + @Test + public void testRootAsNubPlusNub() { + List inputs = Lists.newArrayList("a", "aa", "aaa"); + testInternal(inputs, 3); + } + + @Test + public void testEmptyRootPlusNub() { + List inputs = Lists.newArrayList("a", "aa", "b"); + testInternal(inputs, 3); + } + + @Test + public void testSplitDistantAncestor() { + List inputs = Lists.newArrayList("a", "ac", "acd", "b"); + testInternal(inputs, 4); + } + + protected void testInternal(List inputs, int expectedTreeDepth) { + Log.warn("init logger"); + Tokenizer builder = new Tokenizer(); + for (String s : inputs) { + ByteRange b = new ByteRange(Bytes.toBytes(s)); + builder.addSorted(b); + } + Assert.assertEquals(1, builder.getRoot().getNodeDepth()); + Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth()); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java new file mode 100644 index 0000000..f925115 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.builder.data; + +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestTokenizerDataBasic implements TestTokenizerData { + + static List d = Lists.newArrayList(); + static { + List s = Lists.newArrayList(); + s.add("abc");// nub + s.add("abcde");// leaf + s.add("bbc");// causes root to split and have empty token + s.add("bbc");// makes numOccurrences=2 on the bbc node + s.add("cd");// just to get another node after the numOccurrences=2 + d = Bytes.getUtf8ByteArrays(s); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public List getOutputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java new file mode 100644 index 0000000..87457d0 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.builder.data; + +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.builder.TestTokenizerData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestTokenizerDataEdgeCase implements TestTokenizerData { + + static List d = Lists.newArrayList(); + static { + /* + * tricky little combination because the acegi token will partially match abdfi, but when you + * descend into abdfi, it will not fully match + */ + List s = Lists.newArrayList(); + s.add("abdfh"); + s.add("abdfi"); + s.add("acegi"); + d = Bytes.getUtf8ByteArrays(s); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public List getOutputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java new file mode 100644 index 0000000..f1d0456 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.column; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader; +import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class TestColumnBuilder { + + @Parameters + public static Collection parameters() { + return new TestColumnData.InMemory().getAllAsObjectArray(); + } + + /*********** fields **********************************/ + + protected TestColumnData columns; + protected ByteRangeTreeSet columnSorter; + protected List sortedUniqueColumns; + protected PrefixTreeBlockMeta blockMeta; + protected Tokenizer builder; + protected ColumnSectionWriter writer; + protected byte[] bytes; + protected byte[] buffer; + protected ColumnReader reader; + + /*************** construct ****************************/ + + public TestColumnBuilder(TestColumnData columns) { + this.columns = columns; + List inputs = columns.getInputs(); + this.columnSorter = new ByteRangeTreeSet(inputs); + this.sortedUniqueColumns = columnSorter.compile().getSortedRanges(); + List copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns); + Assert.assertTrue(Bytes.isSorted(copies)); + this.blockMeta = new PrefixTreeBlockMeta(); + this.blockMeta.setNumMetaBytes(0); + this.blockMeta.setNumRowBytes(0); + this.builder = new Tokenizer(); + } + + /************* methods ********************************/ + + @Test + public void testReaderRoundTrip() throws IOException { + for (int i = 0; i < sortedUniqueColumns.size(); ++i) { + ByteRange column = sortedUniqueColumns.get(i); + builder.addSorted(column); + } + List builderOutputArrays = builder.getArrays(); + for (int i = 0; i < builderOutputArrays.size(); ++i) { + byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray(); + byte[] outputArray = builderOutputArrays.get(i); + boolean same = Bytes.equals(inputArray, outputArray); + Assert.assertTrue(same); + } + Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size()); + + writer = new ColumnSectionWriter(blockMeta, builder, false); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writer.compile().writeBytes(baos); + bytes = baos.toByteArray(); + buffer = new byte[blockMeta.getMaxQualifierLength()]; + reader = new ColumnReader(buffer, false); + reader.initOnBlock(blockMeta, bytes); + + List builderNodes = Lists.newArrayList(); + builder.appendNodes(builderNodes, true, true); + int i = 0; + for (TokenizerNode builderNode : builderNodes) { + if (!builderNode.hasOccurrences()) { + continue; + } + Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to + // builder + int position = builderNode.getOutputArrayOffset(); + byte[] output = reader.populateBuffer(position).copyBufferToNewArray(); + boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output); + Assert.assertTrue(same); + ++i; + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnData.java new file mode 100644 index 0000000..47773cb --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnData.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.column; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataRandom; +import org.apache.hadoop.hbase.codec.prefixtree.column.data.TestColumnDataSimple; +import org.apache.hadoop.hbase.util.ByteRange; + +import com.google.common.collect.Lists; + +public interface TestColumnData { + + List getInputs(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestColumnDataSimple() }); + for (int leftShift = 0; leftShift < 16; ++leftShift) { + all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) }); + } + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java new file mode 100644 index 0000000..f245405 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.column.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.byterange.ByteRangeSet; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; + +import com.google.common.collect.Lists; + +public class TestColumnDataRandom implements TestColumnData { + + private List inputs = Lists.newArrayList(); + private List outputs = Lists.newArrayList(); + + public TestColumnDataRandom(int numColumns) { + RedundantKVGenerator generator = new RedundantKVGenerator(); + ByteRangeSet sortedColumns = new ByteRangeTreeSet(); + List d = generator.generateTestKeyValues(numColumns); + for (KeyValue col : d) { + ByteRange colRange = new ByteRange(col.getQualifier()); + inputs.add(colRange); + sortedColumns.add(colRange); + } + for (ByteRange col : sortedColumns.compile().getSortedRanges()) { + outputs.add(col); + } + } + + @Override + public List getInputs() { + return inputs; + } + + @Override + public List getOutputs() { + return outputs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java new file mode 100644 index 0000000..5921116 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.column.data; + +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.column.TestColumnData; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestColumnDataSimple implements TestColumnData { + + @Override + public List getInputs() { + List d = Lists.newArrayList(); + d.add("abc"); + d.add("abcde"); + d.add("abc"); + d.add("bbc"); + d.add("abc"); + return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + } + + @Override + public List getOutputs() { + List d = Lists.newArrayList(); + d.add("abc"); + d.add("abcde"); + d.add("bbc"); + return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/BaseTestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/BaseTestRowData.java new file mode 100644 index 0000000..a895f9f --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/BaseTestRowData.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row; + +import java.util.List; + +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +public abstract class BaseTestRowData implements TestRowData { + + @Override + public List getRowStartIndexes() { + List rowStartIndexes = Lists.newArrayList(); + rowStartIndexes.add(0); + List inputs = getInputs(); + for (int i = 1; i < inputs.size(); ++i) { + KeyValue lastKv = inputs.get(i - 1); + KeyValue kv = inputs.get(i); + if (!CellComparator.equalsRow(lastKv, kv)) { + rowStartIndexes.add(i); + } + } + return rowStartIndexes; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java new file mode 100644 index 0000000..02d599b --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestPrefixTreeSearcher { + + protected static int BLOCK_START = 7; + + @Parameters + public static Collection parameters() { + return new TestRowData.InMemory().getAllAsObjectArray(); + } + + protected TestRowData rows; + protected ByteBuffer block; + + public TestPrefixTreeSearcher(TestRowData testRows) throws IOException { + this.rows = testRows; + ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20); + PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true); + for (KeyValue kv : rows.getInputs()) { + kvBuilder.write(kv); + } + kvBuilder.flush(); + byte[] outputBytes = os.toByteArray(); + this.block = ByteBuffer.wrap(outputBytes); + } + + + @Test + public void testScanForwards() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + + int i = -1; + while (searcher.advance()) { + ++i; + KeyValue inputCell = rows.getInputs().get(i); + Cell outputCell = searcher.current(); + + // check all 3 permutations of equals() + Assert.assertEquals(inputCell, outputCell); + Assert.assertEquals(outputCell, inputCell); + Assert.assertTrue(CellComparator.equals(inputCell, outputCell)); + } + Assert.assertEquals(rows.getInputs().size(), i + 1); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testScanBackwards() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + searcher.positionAfterLastCell(); + int i = -1; + while (searcher.previous()) { + ++i; + int oppositeIndex = rows.getInputs().size() - i - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); + Assert.assertEquals(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), i + 1); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testRandomSeekHits() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + for (KeyValue kv : rows.getInputs()) { + boolean hit = searcher.positionAt(kv); + Assert.assertTrue(hit); + Cell foundKv = searcher.current(); + Assert.assertTrue(CellComparator.equals(kv, foundKv)); + } + } finally { + DecoderFactory.checkIn(searcher); + } + } + + /** + * very hard to test nubs with this thing since the a nextRowKey function will usually skip them + */ + @Test + public void testRandomSeekMisses() throws IOException { + CellSearcher searcher = null; + List rowStartIndexes = rows.getRowStartIndexes(); + try { + searcher = DecoderFactory.checkOut(block, true); + for (int i=0; i < rows.getInputs().size(); ++i) { + KeyValue kv = rows.getInputs().get(i); + + //nextRow + KeyValue inputNextRow = KeyValueUtil.createFirstKeyInNextRow(kv); + + CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow); + boolean isFirstInRow = rowStartIndexes.contains(i); + if(isFirstInRow){ + int rowIndex = rowStartIndexes.indexOf(i); + if(rowIndex < rowStartIndexes.size() - 1){ +// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1; + Assert.assertEquals(CellScannerPosition.BEFORE, position); + /* + * Can't get this to work between nubs like rowB\x00 <-> rowBB + * + * No reason to doubt that it works, but will have to come up with a smarter test. + */ +// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell()); + } + } + + //previous KV + KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv); + boolean hit = searcher.positionAt(inputPreviousKv); + Assert.assertFalse(hit); + position = searcher.positionAtOrAfter(inputPreviousKv); + if(CollectionUtils.isLastIndex(rows.getInputs(), i)){ + Assert.assertTrue(CellScannerPosition.AFTER_LAST == position); + }else{ + Assert.assertTrue(CellScannerPosition.AFTER == position); + /* + * TODO: why i+1 instead of i? + */ + Assert.assertEquals(rows.getInputs().get(i+1), searcher.current()); + } + } + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testRandomSeekIndividualAssertions() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + rows.individualSearcherAssertions(searcher); + } finally { + DecoderFactory.checkIn(searcher); + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java new file mode 100644 index 0000000..b0cb43f --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDeeper; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataEmpty; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNub; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSimple; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivial; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrls; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +/* + * A master class for registering different implementations of TestRowData. + */ +public interface TestRowData { + + List getInputs(); + List getRowStartIndexes(); + + void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta); + + void individualSearcherAssertions(CellSearcher searcher); + + public static class InMemory { + + /* + * The following are different styles of data that the codec may encounter. Having these small + * representations of the data helps pinpoint what is wrong if the encoder breaks. + */ + public static Collection getAll() { + List all = Lists.newArrayList(); + //simple + all.add(new TestRowDataEmpty()); + all.add(new TestRowDataTrivial()); + all.add(new TestRowDataSimple()); + all.add(new TestRowDataDeeper()); + + //more specific + all.add(new TestRowDataSingleQualifier()); +// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder + all.add(new TestRowDataNub()); + all.add(new TestRowDataSearcherRowMiss()); + all.add(new TestRowDataQualifierByteOrdering()); + all.add(new TestRowDataComplexQualifiers()); + all.add(new TestRowDataDifferentTimestamps()); + + //larger data volumes (hard to debug) + all.add(new TestRowDataNumberStrings()); + all.add(new TestRowDataUrls()); + all.add(new TestRowDataUrlsExample()); + all.add(new TestRowDataExerciseFInts()); + all.add(new TestRowDataRandomKeyValues()); + return all; + } + + public static Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + for (TestRowData testRows : getAll()) { + all.add(new Object[] { testRows }); + } + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java new file mode 100644 index 0000000..2bbba8b --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class TestRowEncoder { + + protected static int BLOCK_START = 7; + + @Parameters + public static Collection parameters() { + List parameters = Lists.newArrayList(); + for (TestRowData testRows : TestRowData.InMemory.getAll()) { + parameters.add(new Object[] { testRows }); + } + return parameters; + } + + protected TestRowData rows; + protected List inputKvs; + protected boolean includeMemstoreTS = true; + protected ByteArrayOutputStream os; + protected PrefixTreeEncoder encoder; + protected int totalBytes; + protected PrefixTreeBlockMeta blockMetaWriter; + protected byte[] outputBytes; + protected ByteBuffer buffer; + protected ByteArrayInputStream is; + protected PrefixTreeBlockMeta blockMetaReader; + protected byte[] inputBytes; + protected PrefixTreeArraySearcher searcher; + + public TestRowEncoder(TestRowData testRows) { + this.rows = testRows; + } + + @Before + public void compile() throws IOException { + os = new ByteArrayOutputStream(1 << 20); + encoder = new PrefixTreeEncoder(os, includeMemstoreTS); + + inputKvs = rows.getInputs(); + for (KeyValue kv : inputKvs) { + encoder.write(kv); + } + encoder.flush(); + totalBytes = encoder.getTotalBytes(); + blockMetaWriter = encoder.getBlockMeta(); + outputBytes = os.toByteArray(); + + // start reading, but save the assertions for @Test methods + buffer = ByteBuffer.wrap(outputBytes); + blockMetaReader = new PrefixTreeBlockMeta(buffer); + + searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), + blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength()); + searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); + } + + @Test + public void testEncoderOutput() throws IOException { + Assert.assertEquals(totalBytes, outputBytes.length); + Assert.assertEquals(blockMetaWriter, blockMetaReader); + } + + @Test + public void testForwardScanner() { + int counter = -1; + while (searcher.advance()) { + ++counter; + KeyValue inputKv = rows.getInputs().get(counter); + KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); + assertKeyAndValueEqual(inputKv, outputKv); + } + // assert same number of cells + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + + /** + * probably not needed since testReverseScannerWithJitter() below is more thorough + */ + @Test + public void testReverseScanner() { + searcher.positionAfterLastCell(); + int counter = -1; + while (searcher.previous()) { + ++counter; + int oppositeIndex = rows.getInputs().size() - counter - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); + assertKeyAndValueEqual(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + + /** + * Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically + * a special fan index. + */ + @Test + public void testReverseScannerWithJitter() { + searcher.positionAfterLastCell(); + int counter = -1; + while (true) { + boolean foundCell = searcher.previous(); + if (!foundCell) { + break; + } + ++counter; + + // a next+previous should cancel out + if (!searcher.isAfterLast()) { + searcher.advance(); + searcher.previous(); + } + + int oppositeIndex = rows.getInputs().size() - counter - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); + assertKeyAndValueEqual(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + @Test + public void testIndividualBlockMetaAssertions() { + rows.individualBlockMetaAssertions(blockMetaReader); + } + + + /**************** helper **************************/ + + protected void assertKeyAndValueEqual(Cell expected, Cell actual) { + // assert keys are equal (doesn't compare values) + Assert.assertEquals(expected, actual); + if (includeMemstoreTS) { + Assert.assertEquals(expected.getMvccVersion(), actual.getMvccVersion()); + } + // assert values equal + Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(), + expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(), + actual.getValueLength())); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java new file mode 100644 index 0000000..bd6f02b --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataComplexQualifiers extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + cf = PrefixTreeTestConstants.TEST_CF, + v0 = Bytes.toBytes("v0"); + + static List qualifiers = Lists.newArrayList(); + static { + List qualifierStrings = Lists.newArrayList(); + qualifierStrings.add("cq"); + qualifierStrings.add("cq0"); + qualifierStrings.add("cq1"); + qualifierStrings.add("cq2"); + qualifierStrings.add("dq0");// second root level fan + qualifierStrings.add("dq1");// nub + qualifierStrings.add("dq111");// leaf on nub + qualifierStrings.add("dq11111a");// leaf on leaf + for (String s : qualifierStrings) { + qualifiers.add(Bytes.toBytes(s)); + } + } + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + for (byte[] qualifier : qualifiers) { + d.add(new KeyValue(Arow, cf, qualifier, ts, v0)); + } + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java new file mode 100644 index 0000000..11cd10a --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +/* + * Goes beyond a trivial trie to add a branch on the "cf" node + */ +public class TestRowDataDeeper extends BaseTestRowData{ + + static byte[] + cdc = Bytes.toBytes("cdc"), + cf6 = Bytes.toBytes("cf6"), + cfc = Bytes.toBytes("cfc"), + f = Bytes.toBytes("f"), + q = Bytes.toBytes("q"), + v = Bytes.toBytes("v"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(cdc, f, q, ts, v)); + d.add(new KeyValue(cf6, f, q, ts, v)); + d.add(new KeyValue(cfc, f, q, ts, v)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + //0: token:c; fan:d,f + //1: token:f; fan:6,c + //2: leaves + Assert.assertEquals(3, blockMeta.getRowTreeDepth()); + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + /** + * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only + * rA, but rB as well. + */ + KeyValue cfcRow = KeyValue.createFirstOnRow(Bytes.toBytes("cfc")); + CellScannerPosition position = searcher.positionAtOrAfter(cfcRow); + Assert.assertEquals(CellScannerPosition.AFTER, position); + Assert.assertEquals(d.get(2), searcher.current()); + searcher.previous(); + Assert.assertEquals(d.get(1), searcher.current()); + } +} + + diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java new file mode 100644 index 0000000..8b729bc --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + */ +public class TestRowDataDifferentTimestamps extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + Brow = Bytes.toBytes("Brow"), + cf = Bytes.toBytes("fammy"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1"), + v0 = Bytes.toBytes("v0"); + + static List d = Lists.newArrayList(); + static{ + KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0); + kv0.setMvccVersion(123456789L); + d.add(kv0); + + KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0); + kv1.setMvccVersion(3L); + d.add(kv1); + + KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0); + kv2.setMvccVersion(65537L); + d.add(kv2); + + //watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders +// d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0)); + KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0); + kv3.setMvccVersion(1L); + d.add(kv3); + + KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0); + //don't set memstoreTS + d.add(kv4); + + KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0); + kv5.setMvccVersion(0L); + d.add(kv5); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0); + Assert.assertEquals(12, blockMeta.getNumValueBytes()); + + Assert.assertFalse(blockMeta.isAllSameTimestamp()); + Assert.assertNotNull(blockMeta.getMinTimestamp()); + Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0); + Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0); + + Assert.assertFalse(blockMeta.isAllSameMvccVersion()); + Assert.assertNotNull(blockMeta.getMinMvccVersion()); + Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0); + Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java new file mode 100644 index 0000000..3bb23fa --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataEmpty extends BaseTestRowData{ + + private static byte[] b = new byte[0]; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(b, b, b, 0L, Type.Put, b)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java new file mode 100644 index 0000000..9b7353d --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataExerciseFInts extends BaseTestRowData{ + + static List rows; + static{ + List rowStrings = new ArrayList(); + rowStrings.add("com.edsBlog/directoryAa/pageAaa"); + rowStrings.add("com.edsBlog/directoryAa/pageBbb"); + rowStrings.add("com.edsBlog/directoryAa/pageCcc"); + rowStrings.add("com.edsBlog/directoryAa/pageDdd"); + rowStrings.add("com.edsBlog/directoryBb/pageEee"); + rowStrings.add("com.edsBlog/directoryBb/pageFff"); + rowStrings.add("com.edsBlog/directoryBb/pageGgg"); + rowStrings.add("com.edsBlog/directoryBb/pageHhh"); + rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); + rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); + rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); + rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); + rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); + rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); + rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); + rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); + ByteRangeTreeSet ba = new ByteRangeTreeSet(); + for(String row : rowStrings){ + ba.add(new ByteRange(Bytes.toBytes(row))); + } + rows = ba.compile().getSortedRanges(); + } + + static List cols = Lists.newArrayList(); + static{ + cols.add("Chrome"); + cols.add("Chromeb"); + cols.add("Firefox"); + cols.add("InternetExplorer"); + cols.add("Opera"); + cols.add("Safari"); + cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier"); + cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + } + + static long ts = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static { + for (ByteRange row : rows) { + for (String col : cols) { + KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, + Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); + kvs.add(kv); + } + } + } + + @Override + public List getInputs() { + return kvs; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1); + Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java new file mode 100644 index 0000000..ab848d6 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataMultiFamilies extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"), + famA = Bytes.toBytes("famA"), + famB = Bytes.toBytes("famB"), + famBB = Bytes.toBytes("famBB"), + q0 = Bytes.toBytes("q0"), + q1 = Bytes.toBytes("q1"),//start with a different character + vvv = Bytes.toBytes("vvv"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, famA, q0, ts, vvv)); + d.add(new KeyValue(rowA, famB, q1, ts, vvv)); + d.add(new KeyValue(rowA, famBB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famA, q0, ts, vvv)); + d.add(new KeyValue(rowB, famA, q1, ts, vvv)); + d.add(new KeyValue(rowB, famB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famBB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famBB, q1, ts, vvv)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNub.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNub.java new file mode 100644 index 0000000..ad19cd4 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNub.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataNub extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"),//nub + rowBB = Bytes.toBytes("rowBB"), + cf = PrefixTreeTestConstants.TEST_CF, + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1"), + v0 = Bytes.toBytes("v0"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowA, cf, cq1, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq1, ts, v0)); + d.add(new KeyValue(rowBB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowBB, cf, cq1, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java new file mode 100644 index 0000000..e508c2c --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataNumberStrings extends BaseTestRowData{ + + static List d = Lists.newArrayList(); + static { + + /** + * Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings + *

    + * This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to + * the previous nub. It should do 11->10, but it was incorrectly doing 11->1 + */ + List problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end + for(Integer i : problematicSeries){ +// for(int i=0; i < 13; ++i){ + byte[] row = Bytes.toBytes(""+i); + byte[] family = Bytes.toBytes("F"); + byte[] column = Bytes.toBytes("C"); + byte[] value = Bytes.toBytes("V"); + + d.add(new KeyValue(row, family, column, 0L, Type.Put, value)); + } + Collections.sort(d, new CellComparator()); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java new file mode 100644 index 0000000..a8c4646 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataQualifierByteOrdering extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + Brow = Bytes.toBytes("Brow"), + Brow2 = Bytes.toBytes("Brow2"), + fam = Bytes.toBytes("HappyFam"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat + cq2 = Bytes.toBytes("cq2"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(Arow, fam, cq0, ts, v0)); + d.add(new KeyValue(Arow, fam, cq1, ts, v0)); + d.add(new KeyValue(Brow, fam, cq0, ts, v0)); + d.add(new KeyValue(Brow, fam, cq2, ts, v0)); + d.add(new KeyValue(Brow2, fam, cq1, ts, v0)); + d.add(new KeyValue(Brow2, fam, cq2, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java new file mode 100644 index 0000000..5834d1b --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; + +import com.google.common.collect.Lists; + +public class TestRowDataRandomKeyValues extends BaseTestRowData { + + static List d = Lists.newArrayList(); + static RedundantKVGenerator generator = new RedundantKVGenerator(); + static { + d = generator.generateTestKeyValues(1 << 10); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java new file mode 100644 index 0000000..aad32d8 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +public class TestRowDataSearcherRowMiss extends BaseTestRowData{ + + static byte[] + //don't let the rows share any common prefix bytes + A = Bytes.toBytes("A"), + AA = Bytes.toBytes("AA"), + AAA = Bytes.toBytes("AAA"), + B = Bytes.toBytes("B"), + cf = Bytes.toBytes("fam"), + cq = Bytes.toBytes("cq0"), + v = Bytes.toBytes("v0"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(A, cf, cq, ts, v)); + d.add(new KeyValue(AA, cf, cq, ts, v)); + d.add(new KeyValue(AAA, cf, cq, ts, v)); + d.add(new KeyValue(B, cf, cq, ts, v)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + assertRowOffsetsCorrect(); + + searcher.resetToBeforeFirstEntry(); + + //test first cell + searcher.advance(); + Cell first = searcher.current(); + Assert.assertTrue(CellComparator.equals(d.get(0), first)); + + //test first cell in second row + Assert.assertTrue(searcher.positionAt(d.get(1))); + Assert.assertTrue(CellComparator.equals(d.get(1), searcher.current())); + + testBetween1and2(searcher); + testBetween2and3(searcher); + } + + /************ private methods, call from above *******************/ + + private void assertRowOffsetsCorrect(){ + Assert.assertEquals(4, getRowStartIndexes().size()); + } + + private void testBetween1and2(CellSearcher searcher){ + CellScannerPosition p;//reuse + Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v); + + //test exact + Assert.assertFalse(searcher.positionAt(betweenAAndAAA)); + + //test atOrBefore + p = searcher.positionAtOrBefore(betweenAAndAAA); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(1))); + + //test atOrAfter + p = searcher.positionAtOrAfter(betweenAAndAAA); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(2))); + } + + private void testBetween2and3(CellSearcher searcher){ + CellScannerPosition p;//reuse + Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v); + + //test exact + Assert.assertFalse(searcher.positionAt(betweenAAAndB)); + + //test atOrBefore + p = searcher.positionAtOrBefore(betweenAAAndB); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(2))); + + //test atOrAfter + p = searcher.positionAtOrAfter(betweenAAAndB); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(3))); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSimple.java new file mode 100644 index 0000000..7fbde65 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSimple.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +public class TestRowDataSimple extends BaseTestRowData { + + static byte[] + // don't let the rows share any common prefix bytes + rowA = Bytes.toBytes("Arow"), + rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat + cq2 = Bytes.toBytes("dcq2"),// start with a different character + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowA, cf, cq1, ts, v0)); + d.add(new KeyValue(rowA, cf, cq2, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq1, ts, v0)); + d.add(new KeyValue(rowB, cf, cq2, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + CellScannerPosition p;// reuse + searcher.resetToBeforeFirstEntry(); + + // test first cell + searcher.advance(); + Cell first = searcher.current(); + Assert.assertTrue(CellComparator.equals(d.get(0), first)); + + // test first cell in second row + Assert.assertTrue(searcher.positionAt(d.get(3))); + Assert.assertTrue(CellComparator.equals(d.get(3), searcher.current())); + + Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0); + + // test exact + Assert.assertFalse(searcher.positionAt(between4And5)); + + // test atOrBefore + p = searcher.positionAtOrBefore(between4And5); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(4))); + + // test atOrAfter + p = searcher.positionAtOrAfter(between4And5); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(5))); + + // test when key falls before first key in block + Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0); + Assert.assertFalse(searcher.positionAt(beforeFirst)); + p = searcher.positionAtOrBefore(beforeFirst); + Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p); + p = searcher.positionAtOrAfter(beforeFirst); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(0))); + Assert.assertEquals(d.get(0), searcher.current()); + + // test when key falls after last key in block + Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z + Assert.assertFalse(searcher.positionAt(afterLast)); + p = searcher.positionAtOrAfter(afterLast); + Assert.assertEquals(CellScannerPosition.AFTER_LAST, p); + p = searcher.positionAtOrBefore(afterLast); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.current(), CollectionUtils.getLast(d))); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java new file mode 100644 index 0000000..9944057 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +public class TestRowDataSingleQualifier extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"), + cf = PrefixTreeTestConstants.TEST_CF, + cq0 = Bytes.toBytes("cq0"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java new file mode 100644 index 0000000..1e86b78 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +public class TestRowDataTrivial extends BaseTestRowData{ + + static byte[] + rA = Bytes.toBytes("rA"), + rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests + cf = Bytes.toBytes("fam"), + cq0 = Bytes.toBytes("q0"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rA, cf, cq0, ts, v0)); + d.add(new KeyValue(rB, cf, cq0, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + // node[0] -> root[r] + // node[1] -> leaf[A], etc + Assert.assertEquals(2, blockMeta.getRowTreeDepth()); + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + /** + * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA, + * but rB as well. + */ + KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz")); + CellScannerPosition position = searcher.positionAtOrAfter(afterLast); + Assert.assertEquals(CellScannerPosition.AFTER_LAST, position); + Assert.assertNull(searcher.current()); + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrls.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrls.java new file mode 100644 index 0000000..692e700 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrls.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataUrls extends BaseTestRowData{ + + static List rows; + static{ + List rowStrings = new ArrayList(); + rowStrings.add("com.edsBlog/directoryAa/pageAaa"); + rowStrings.add("com.edsBlog/directoryAa/pageBbb"); + rowStrings.add("com.edsBlog/directoryAa/pageCcc"); + rowStrings.add("com.edsBlog/directoryAa/pageDdd"); + rowStrings.add("com.edsBlog/directoryBb/pageEee"); + rowStrings.add("com.edsBlog/directoryBb/pageFff"); + rowStrings.add("com.edsBlog/directoryBb/pageGgg"); + rowStrings.add("com.edsBlog/directoryBb/pageHhh"); + rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); + rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); + rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); + rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); + rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); + rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); + rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); + rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); + ByteRangeTreeSet ba = new ByteRangeTreeSet(); + for (String row : rowStrings) { + ba.add(new ByteRange(Bytes.toBytes(row))); + } + rows = ba.compile().getSortedRanges(); + } + + static List cols = Lists.newArrayList(); + static { + cols.add("Chrome"); + cols.add("Chromeb"); + cols.add("Firefox"); + cols.add("InternetExplorer"); + cols.add("Opera"); + cols.add("Safari"); + } + + static long ts = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static { + for (ByteRange row : rows) { + for (String col : cols) { + KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, + Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); + kvs.add(kv); + // System.out.println("TestRows5:"+kv); + } + } + } + + @Override + public List getInputs() { + return kvs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java new file mode 100644 index 0000000..543afb6 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnNodeWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowNodeWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataUrlsExample extends BaseTestRowData{ + + static String TENANT_ID = Integer.toString(95322); + static String APP_ID = Integer.toString(12); + static List URLS = Lists.newArrayList( + "com.dablog/2011/10/04/boating", + "com.dablog/2011/10/09/lasers", + "com.jamiesrecipes", //this nub helped find a bug + "com.jamiesrecipes/eggs"); + static String FAMILY = "hits"; + static List BROWSERS = Lists.newArrayList( + "Chrome", "IE8", "IE9beta");//, "Opera", "Safari"); + static long TIMESTAMP = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static{ + for(String rowKey : URLS){ + for(String qualifier : BROWSERS){ + KeyValue kv = new KeyValue( + Bytes.toBytes(rowKey), + Bytes.toBytes(FAMILY), + Bytes.toBytes(qualifier), + TIMESTAMP, + KeyValue.Type.Put, + Bytes.toBytes("VvvV")); + kvs.add(kv); + } + } + } + + /** + * Used for generating docs. + */ + public static void main(String... args) throws IOException{ + System.out.println("-- inputs --"); + System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true)); + ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20); + PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false); + + for(KeyValue kv : kvs){ + encoder.write(kv); + } + encoder.flush(); + + System.out.println("-- qualifier SortedPtBuilderNodes --"); + for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){ + System.out.println(tokenizer); + } + for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){ + System.out.println(tokenizerNode); + } + + System.out.println("-- qualifier PtColumnNodeWriters --"); + for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){ + System.out.println(writer); + } + + System.out.println("-- rowKey SortedPtBuilderNodes --"); + for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){ + System.out.println(tokenizerNode); + } + for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){ + System.out.println(tokenizerNode); + } + + System.out.println("-- row PtRowNodeWriters --"); + for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){ + System.out.println(writer); + } + for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){ + System.out.println(writer); + } + + System.out.println("-- concatenated values --"); + System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray())); + } + + @Override + public List getInputs() { + return kvs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampData.java new file mode 100644 index 0000000..f26c5b8 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampData.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.timestamp; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic; +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers; +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats; + +import com.google.common.collect.Lists; + +public interface TestTimestampData { + + List getInputs(); + long getMinimum(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestTimestampDataBasic() }); + all.add(new Object[] { new TestTimestampDataNumbers() }); + all.add(new Object[] { new TestTimestampDataRepeats() }); + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java new file mode 100644 index 0000000..da41cbc --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.timestamp; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestTimestampEncoder { + + @Parameters + public static Collection parameters() { + return new TestTimestampData.InMemory().getAllAsObjectArray(); + } + + private TestTimestampData timestamps; + private PrefixTreeBlockMeta blockMeta; + private LongEncoder encoder; + private byte[] bytes; + private TimestampDecoder decoder; + + public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException { + this.timestamps = testTimestamps; + this.blockMeta = new PrefixTreeBlockMeta(); + this.blockMeta.setNumMetaBytes(0); + this.blockMeta.setNumRowBytes(0); + this.blockMeta.setNumQualifierBytes(0); + this.encoder = new LongEncoder(); + for (Long ts : testTimestamps.getInputs()) { + encoder.add(ts); + } + encoder.compile(); + blockMeta.setTimestampFields(encoder); + bytes = encoder.getByteArray(); + decoder = new TimestampDecoder(); + decoder.initOnBlock(blockMeta, bytes); + } + + @Test + public void testCompressorMinimum() { + Assert.assertEquals(timestamps.getMinimum(), encoder.getMin()); + } + + @Test + public void testCompressorRoundTrip() { + long[] outputs = encoder.getSortedUniqueTimestamps(); + for (int i = 0; i < timestamps.getOutputs().size(); ++i) { + long input = timestamps.getOutputs().get(i); + long output = outputs[i]; + Assert.assertEquals(input, output); + } + } + + @Test + public void testReaderMinimum() { + Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0)); + } + + @Test + public void testReaderRoundTrip() { + for (int i = 0; i < timestamps.getOutputs().size(); ++i) { + long input = timestamps.getOutputs().get(i); + long output = decoder.getLong(i); + Assert.assertEquals(input, output); + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java new file mode 100644 index 0000000..f11fab4 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataBasic implements TestTimestampData { + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(5L); + d.add(3L); + d.add(0L); + d.add(1L); + d.add(3L); + return d; + } + + @Override + public long getMinimum() { + return 0L; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + d.add(0L); + d.add(1L); + d.add(3L); + d.add(5L); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java new file mode 100644 index 0000000..f5ed89d --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataNumbers implements TestTimestampData { + + private int shift = 8; + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(5L << shift); + d.add(3L << shift); + d.add(7L << shift); + d.add(1L << shift); + d.add(3L << shift); + return d; + } + + @Override + public long getMinimum() { + return 1L << shift; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + d.add(1L << shift); + d.add(3L << shift); + d.add(5L << shift); + d.add(7L << shift); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java new file mode 100644 index 0000000..69548d2 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataRepeats implements TestTimestampData { + + private static long t = 1234567890L; + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(t); + d.add(t); + d.add(t); + d.add(t); + d.add(t); + return d; + } + + @Override + public long getMinimum() { + return t; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/bytes/TestByteRange.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/bytes/TestByteRange.java new file mode 100644 index 0000000..e8c1d90 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/bytes/TestByteRange.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.bytes; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.junit.Test; + +public class TestByteRange { + + @Test + public void testConstructor() { + ByteRange b = new ByteRange(new byte[] { 0, 1, 2 }); + Assert.assertEquals(3, b.getLength()); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/comparator/ByteArrayComparator.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/comparator/ByteArrayComparator.java new file mode 100644 index 0000000..9a81d90 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/comparator/ByteArrayComparator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.comparator; + +import java.util.Comparator; + +import org.apache.hadoop.hbase.util.Bytes; + +public class ByteArrayComparator implements Comparator { + + @Override + public int compare(byte[] a, byte[] b) { + return Bytes.compareTo(a, b); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/NumberFormatter.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/NumberFormatter.java new file mode 100644 index 0000000..4aaea61 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/NumberFormatter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.number; + +import java.text.DecimalFormat; + +public class NumberFormatter { + + public static String addCommas(final Number pValue) { + if (pValue == null) { + return null; + } + String format = "###,###,###,###,###,###,###,###.#####################"; + return new DecimalFormat(format).format(pValue);// biggest is 19 digits + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/RandomNumberUtils.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/RandomNumberUtils.java new file mode 100644 index 0000000..f2f06e6 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/number/RandomNumberUtils.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.number; + +import java.util.Random; + +public class RandomNumberUtils { + + public static long nextPositiveLong(Random random) { + while (true) { + long value = random.nextLong(); + if (value > 0) { + return value; + } + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestFIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestFIntTool.java new file mode 100644 index 0000000..03ba018 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestFIntTool.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.hadoop.hbase.util.vint.UFIntTool; +import org.junit.Assert; +import org.junit.Test; + +/********************** tests *************************/ + +public class TestFIntTool { + @Test + public void testLeadingZeros() { + Assert.assertEquals(64, Long.numberOfLeadingZeros(0)); + Assert.assertEquals(63, Long.numberOfLeadingZeros(1)); + Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE)); + Assert.assertEquals(0, Long.numberOfLeadingZeros(-1)); + Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE)); + Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1)); + } + + @Test + public void testMaxValueForNumBytes() { + Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1)); + Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2)); + Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3)); + Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7)); + } + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UFIntTool.numBytes(0)); + Assert.assertEquals(1, UFIntTool.numBytes(1)); + Assert.assertEquals(1, UFIntTool.numBytes(255)); + Assert.assertEquals(2, UFIntTool.numBytes(256)); + Assert.assertEquals(2, UFIntTool.numBytes(65535)); + Assert.assertEquals(3, UFIntTool.numBytes(65536)); + Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL)); + Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L)); + Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE)); + Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE)); + Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1)); + } + + @Test + public void testGetBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0)); + Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1)); + Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255)); + Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256)); + Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3)); + Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128)); + Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, + UFIntTool.getBytes(4, Integer.MAX_VALUE)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, + UFIntTool.getBytes(8, Long.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 })); + Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 })); + Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 })); + Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 })); + Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 })); + Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 })); + Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 })); + Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 })); + Assert.assertEquals(Long.MAX_VALUE, + UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 })); + } + + @Test + public void testRoundTrips() { + long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL, + 0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE, + (long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE }; + for (int i = 0; i < values.length; ++i) { + Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i]))); + } + } + + @Test + public void testWriteBytes() throws IOException {// copied from testGetBytes + Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0)); + Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1)); + Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255)); + Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256)); + Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3)); + Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128)); + Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, + bytesViaOutputStream(4, Integer.MAX_VALUE)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, + bytesViaOutputStream(8, Long.MAX_VALUE)); + } + + private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + UFIntTool.writeBytes(outputWidth, value, os); + return os.toByteArray(); + } +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVIntTool.java new file mode 100644 index 0000000..9d78d7f --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVIntTool.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hbase.util.vint.UVIntTool; +import org.junit.Assert; +import org.junit.Test; + +public class TestVIntTool { + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UVIntTool.numBytes(0)); + Assert.assertEquals(1, UVIntTool.numBytes(1)); + Assert.assertEquals(1, UVIntTool.numBytes(100)); + Assert.assertEquals(1, UVIntTool.numBytes(126)); + Assert.assertEquals(1, UVIntTool.numBytes(127)); + Assert.assertEquals(2, UVIntTool.numBytes(128)); + Assert.assertEquals(2, UVIntTool.numBytes(129)); + Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE)); + } + + @Test + public void testWriteBytes() throws IOException { + Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0)); + Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1)); + Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63)); + Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155)); + Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE)); + } + + private byte[] bytesViaOutputStream(int value) throws IOException { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + UVIntTool.writeBytes(value, os); + return os.toByteArray(); + } + + @Test + public void testToBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0)); + Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1)); + Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63)); + Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155)); + Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(Integer.MAX_VALUE, UVIntTool.getInt(UVIntTool.MAX_VALUE_BYTES)); + } + + @Test + public void testRoundTrips() { + Random random = new Random(); + for (int i = 0; i < 10000; ++i) { + int value = random.nextInt(Integer.MAX_VALUE); + byte[] bytes = UVIntTool.getBytes(value); + int roundTripped = UVIntTool.getInt(bytes); + Assert.assertEquals(value, roundTripped); + } + } + + @Test + public void testInputStreams() throws IOException { + ByteArrayInputStream is; + is = new ByteArrayInputStream(new byte[] { 0 }); + Assert.assertEquals(0, UVIntTool.getInt(is)); + is = new ByteArrayInputStream(new byte[] { 5 }); + Assert.assertEquals(5, UVIntTool.getInt(is)); + is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); + Assert.assertEquals(155, UVIntTool.getInt(is)); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVLongTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVLongTool.java new file mode 100644 index 0000000..8d4c4c8 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/util/vint/TestVLongTool.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util.vint; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hbase.util.number.RandomNumberUtils; +import org.apache.hadoop.hbase.util.vint.UVLongTool; +import org.junit.Assert; +import org.junit.Test; + +public class TestVLongTool { + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UVLongTool.numBytes(0)); + Assert.assertEquals(1, UVLongTool.numBytes(1)); + Assert.assertEquals(1, UVLongTool.numBytes(100)); + Assert.assertEquals(1, UVLongTool.numBytes(126)); + Assert.assertEquals(1, UVLongTool.numBytes(127)); + Assert.assertEquals(2, UVLongTool.numBytes(128)); + Assert.assertEquals(2, UVLongTool.numBytes(129)); + Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE)); + } + + @Test + public void testToBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0)); + Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1)); + Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63)); + Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155)); + Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES)); + } + + @Test + public void testFromBytesOffset() { + Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES, 0)); + + long ms = 1318966363481L; +// System.out.println(ms); + byte[] bytes = UVLongTool.getBytes(ms); +// System.out.println(Arrays.toString(bytes)); + long roundTripped = UVLongTool.getLong(bytes, 0); + Assert.assertEquals(ms, roundTripped); + + int calculatedNumBytes = UVLongTool.numBytes(ms); + int actualNumBytes = bytes.length; + Assert.assertEquals(actualNumBytes, calculatedNumBytes); + + byte[] shiftedBytes = new byte[1000]; + int shift = 33; + System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length); + long shiftedRoundTrip = UVLongTool.getLong(shiftedBytes, shift); + Assert.assertEquals(ms, shiftedRoundTrip); + } + + @Test + public void testRoundTrips() { + Random random = new Random(); + for (int i = 0; i < 10000; ++i) { + long value = RandomNumberUtils.nextPositiveLong(random); + byte[] bytes = UVLongTool.getBytes(value); + long roundTripped = UVLongTool.getLong(bytes); + Assert.assertEquals(value, roundTripped); + int calculatedNumBytes = UVLongTool.numBytes(value); + int actualNumBytes = bytes.length; + Assert.assertEquals(actualNumBytes, calculatedNumBytes); + } + } + + @Test + public void testInputStreams() throws IOException { + ByteArrayInputStream is; + is = new ByteArrayInputStream(new byte[] { 0 }); + Assert.assertEquals(0, UVLongTool.getLong(is)); + is = new ByteArrayInputStream(new byte[] { 5 }); + Assert.assertEquals(5, UVLongTool.getLong(is)); + is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); + Assert.assertEquals(155, UVLongTool.getLong(is)); + } +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java deleted file mode 100644 index e2e97a1..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.keyvalue; - -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueTestUtil; -import org.apache.hbase.codec.prefixtree.row.TestRowData; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -public class TestKeyValueTool { - - @Parameters - public static Collection parameters() { - return new TestRowData.InMemory().getAllAsObjectArray(); - } - - private TestRowData rows; - - public TestKeyValueTool(TestRowData testRows) { - this.rows = testRows; - } - - @Test - public void testRoundTripToBytes() { - List kvs = rows.getInputs(); - ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false); - List roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false); - Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray()); - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java deleted file mode 100644 index 04087ea..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree; - -import org.apache.hadoop.hbase.util.Bytes; - -public class PrefixTreeTestConstants { - - public static final byte[] TEST_CF = Bytes.toBytes("cfDefault"); - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java deleted file mode 100644 index 688b65a..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.blockmeta; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.junit.Assert; -import org.junit.Test; - -public class TestBlockMeta { - - static int BLOCK_START = 123; - - private static PrefixTreeBlockMeta createSample() { - PrefixTreeBlockMeta m = new PrefixTreeBlockMeta(); - m.setNumMetaBytes(0); - m.setNumKeyValueBytes(3195); - - m.setNumRowBytes(0); - m.setNumFamilyBytes(3); - m.setNumQualifierBytes(12345); - m.setNumTimestampBytes(23456); - m.setNumMvccVersionBytes(5); - m.setNumValueBytes(34567); - - m.setNextNodeOffsetWidth(3); - m.setFamilyOffsetWidth(1); - m.setQualifierOffsetWidth(2); - m.setTimestampIndexWidth(1); - m.setMvccVersionIndexWidth(2); - m.setValueOffsetWidth(8); - m.setValueLengthWidth(3); - - m.setRowTreeDepth(11); - m.setMaxRowLength(200); - m.setMaxQualifierLength(50); - - m.setMinTimestamp(1318966363481L); - m.setTimestampDeltaWidth(3); - m.setMinMvccVersion(100L); - m.setMvccVersionDeltaWidth(4); - - m.setAllSameType(false); - m.setAllTypes(KeyValue.Type.Delete.getCode()); - - m.setNumUniqueRows(88); - m.setNumUniqueFamilies(1); - m.setNumUniqueQualifiers(56); - return m; - } - - @Test - public void testStreamSerialization() throws IOException { - PrefixTreeBlockMeta original = createSample(); - ByteArrayOutputStream os = new ByteArrayOutputStream(10000); - original.writeVariableBytesToOutputStream(os); - ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray()); - PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(buffer); - Assert.assertTrue(original.equals(roundTripped)); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java deleted file mode 100644 index 36f4325..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.builder; - -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -public class TestTokenizer { - - @Parameters - public static Collection parameters() { - return new TestTokenizerData.InMemory().getAllAsObjectArray(); - } - - private List inputs; - private Tokenizer builder; - private List roundTripped; - - public TestTokenizer(TestTokenizerData sortedByteArrays) { - this.inputs = sortedByteArrays.getInputs(); - this.builder = new Tokenizer(); - for (byte[] array : inputs) { - builder.addSorted(new ByteRange(array)); - } - this.roundTripped = builder.getArrays(); - } - - @Test - public void testReaderRoundTrip() { - Assert.assertEquals(inputs.size(), roundTripped.size()); - Assert.assertTrue(Bytes.isSorted(roundTripped)); - Assert.assertTrue(Bytes.equals(inputs, roundTripped)); - } - - @Test - public void testSearching() { - for (byte[] input : inputs) { - TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult(); - builder.getNode(resultHolder, input, 0, input.length); - TokenizerNode n = resultHolder.getMatchingNode(); - byte[] output = n.getNewByteArray(); - Assert.assertTrue(Bytes.equals(input, output)); - } - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java deleted file mode 100644 index b6db64a..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.builder; - -import java.util.Collection; -import java.util.List; - -import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic; -import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase; - -import com.google.common.collect.Lists; - -public interface TestTokenizerData { - - List getInputs(); - List getOutputs(); - - public static class InMemory { - public Collection getAllAsObjectArray() { - List all = Lists.newArrayList(); - all.add(new Object[] { new TestTokenizerDataBasic() }); - all.add(new Object[] { new TestTokenizerDataEdgeCase() }); - return all; - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java deleted file mode 100644 index 28ea6d7..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.builder; - -import java.util.List; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; -import org.junit.Assert; -import org.junit.Test; -import org.mortbay.log.Log; - -import com.google.common.collect.Lists; - -public class TestTreeDepth { - - @Test - public void testSingleNode() { - List inputs = Lists.newArrayList("a"); - testInternal(inputs, 1); - } - - @Test - public void testSimpleBranch() { - List inputs = Lists.newArrayList("a", "aa", "ab"); - testInternal(inputs, 2); - } - - @Test - public void testEmptyRoot() { - List inputs = Lists.newArrayList("a", "b"); - testInternal(inputs, 2); - } - - @Test - public void testRootAsNub() { - List inputs = Lists.newArrayList("a", "aa"); - testInternal(inputs, 2); - } - - @Test - public void testRootAsNubPlusNub() { - List inputs = Lists.newArrayList("a", "aa", "aaa"); - testInternal(inputs, 3); - } - - @Test - public void testEmptyRootPlusNub() { - List inputs = Lists.newArrayList("a", "aa", "b"); - testInternal(inputs, 3); - } - - @Test - public void testSplitDistantAncestor() { - List inputs = Lists.newArrayList("a", "ac", "acd", "b"); - testInternal(inputs, 4); - } - - protected void testInternal(List inputs, int expectedTreeDepth) { - Log.warn("init logger"); - Tokenizer builder = new Tokenizer(); - for (String s : inputs) { - ByteRange b = new ByteRange(Bytes.toBytes(s)); - builder.addSorted(b); - } - Assert.assertEquals(1, builder.getRoot().getNodeDepth()); - Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth()); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java deleted file mode 100644 index d717999..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.builder.data; - -import java.util.List; - -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData; - -import com.google.common.collect.Lists; - -public class TestTokenizerDataBasic implements TestTokenizerData { - - static List d = Lists.newArrayList(); - static { - List s = Lists.newArrayList(); - s.add("abc");// nub - s.add("abcde");// leaf - s.add("bbc");// causes root to split and have empty token - s.add("bbc");// makes numOccurrences=2 on the bbc node - s.add("cd");// just to get another node after the numOccurrences=2 - d = Bytes.getUtf8ByteArrays(s); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public List getOutputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java deleted file mode 100644 index 4c26649..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.builder.data; - -import java.util.List; - -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData; - -import com.google.common.collect.Lists; - -public class TestTokenizerDataEdgeCase implements TestTokenizerData { - - static List d = Lists.newArrayList(); - static { - /* - * tricky little combination because the acegi token will partially match abdfi, but when you - * descend into abdfi, it will not fully match - */ - List s = Lists.newArrayList(); - s.add("abdfh"); - s.add("abdfi"); - s.add("acegi"); - d = Bytes.getUtf8ByteArrays(s); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public List getOutputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java deleted file mode 100644 index 4dcb0a1..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.column; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.ByteRangeTool; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader; -import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; -import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import com.google.common.collect.Lists; - -@RunWith(Parameterized.class) -public class TestColumnBuilder { - - @Parameters - public static Collection parameters() { - return new TestColumnData.InMemory().getAllAsObjectArray(); - } - - /*********** fields **********************************/ - - protected TestColumnData columns; - protected ByteRangeTreeSet columnSorter; - protected List sortedUniqueColumns; - protected PrefixTreeBlockMeta blockMeta; - protected Tokenizer builder; - protected ColumnSectionWriter writer; - protected byte[] bytes; - protected byte[] buffer; - protected ColumnReader reader; - - /*************** construct ****************************/ - - public TestColumnBuilder(TestColumnData columns) { - this.columns = columns; - List inputs = columns.getInputs(); - this.columnSorter = new ByteRangeTreeSet(inputs); - this.sortedUniqueColumns = columnSorter.compile().getSortedRanges(); - List copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns); - Assert.assertTrue(Bytes.isSorted(copies)); - this.blockMeta = new PrefixTreeBlockMeta(); - this.blockMeta.setNumMetaBytes(0); - this.blockMeta.setNumRowBytes(0); - this.builder = new Tokenizer(); - } - - /************* methods ********************************/ - - @Test - public void testReaderRoundTrip() throws IOException { - for (int i = 0; i < sortedUniqueColumns.size(); ++i) { - ByteRange column = sortedUniqueColumns.get(i); - builder.addSorted(column); - } - List builderOutputArrays = builder.getArrays(); - for (int i = 0; i < builderOutputArrays.size(); ++i) { - byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray(); - byte[] outputArray = builderOutputArrays.get(i); - boolean same = Bytes.equals(inputArray, outputArray); - Assert.assertTrue(same); - } - Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size()); - - writer = new ColumnSectionWriter(blockMeta, builder, false); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - writer.compile().writeBytes(baos); - bytes = baos.toByteArray(); - buffer = new byte[blockMeta.getMaxQualifierLength()]; - reader = new ColumnReader(buffer, false); - reader.initOnBlock(blockMeta, bytes); - - List builderNodes = Lists.newArrayList(); - builder.appendNodes(builderNodes, true, true); - int i = 0; - for (TokenizerNode builderNode : builderNodes) { - if (!builderNode.hasOccurrences()) { - continue; - } - Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to - // builder - int position = builderNode.getOutputArrayOffset(); - byte[] output = reader.populateBuffer(position).copyBufferToNewArray(); - boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output); - Assert.assertTrue(same); - ++i; - } - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java deleted file mode 100644 index 522a8ad..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.column; - -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataRandom; -import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataSimple; - -import com.google.common.collect.Lists; - -public interface TestColumnData { - - List getInputs(); - List getOutputs(); - - public static class InMemory { - public Collection getAllAsObjectArray() { - List all = Lists.newArrayList(); - all.add(new Object[] { new TestColumnDataSimple() }); - for (int leftShift = 0; leftShift < 16; ++leftShift) { - all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) }); - } - return all; - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java deleted file mode 100644 index 7a66a73..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.column.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; -import org.apache.hbase.codec.prefixtree.column.TestColumnData; -import org.apache.hbase.util.byterange.ByteRangeSet; -import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; - -import com.google.common.collect.Lists; - -public class TestColumnDataRandom implements TestColumnData { - - private List inputs = Lists.newArrayList(); - private List outputs = Lists.newArrayList(); - - public TestColumnDataRandom(int numColumns) { - RedundantKVGenerator generator = new RedundantKVGenerator(); - ByteRangeSet sortedColumns = new ByteRangeTreeSet(); - List d = generator.generateTestKeyValues(numColumns); - for (KeyValue col : d) { - ByteRange colRange = new ByteRange(col.getQualifier()); - inputs.add(colRange); - sortedColumns.add(colRange); - } - for (ByteRange col : sortedColumns.compile().getSortedRanges()) { - outputs.add(col); - } - } - - @Override - public List getInputs() { - return inputs; - } - - @Override - public List getOutputs() { - return outputs; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java deleted file mode 100644 index ad5a2cd..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.column.data; - -import java.util.List; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.ByteRangeTool; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.column.TestColumnData; - -import com.google.common.collect.Lists; - -public class TestColumnDataSimple implements TestColumnData { - - @Override - public List getInputs() { - List d = Lists.newArrayList(); - d.add("abc"); - d.add("abcde"); - d.add("abc"); - d.add("bbc"); - d.add("abc"); - return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); - } - - @Override - public List getOutputs() { - List d = Lists.newArrayList(); - d.add("abc"); - d.add("abcde"); - d.add("bbc"); - return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java deleted file mode 100644 index 9c3fcf9..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row; - -import java.util.List; - -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; - -import com.google.common.collect.Lists; - -public abstract class BaseTestRowData implements TestRowData { - - @Override - public List getRowStartIndexes() { - List rowStartIndexes = Lists.newArrayList(); - rowStartIndexes.add(0); - List inputs = getInputs(); - for (int i = 1; i < inputs.size(); ++i) { - KeyValue lastKv = inputs.get(i - 1); - KeyValue kv = inputs.get(i); - if (!CellComparator.equalsRow(lastKv, kv)) { - rowStartIndexes.add(i); - } - } - return rowStartIndexes; - } - - @Override - public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { - } - - @Override - public void individualSearcherAssertions(CellSearcher searcher) { - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java deleted file mode 100644 index b138d1e..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -public class TestPrefixTreeSearcher { - - protected static int BLOCK_START = 7; - - @Parameters - public static Collection parameters() { - return new TestRowData.InMemory().getAllAsObjectArray(); - } - - protected TestRowData rows; - protected ByteBuffer block; - - public TestPrefixTreeSearcher(TestRowData testRows) throws IOException { - this.rows = testRows; - ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20); - PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true); - for (KeyValue kv : rows.getInputs()) { - kvBuilder.write(kv); - } - kvBuilder.flush(); - byte[] outputBytes = os.toByteArray(); - this.block = ByteBuffer.wrap(outputBytes); - } - - - @Test - public void testScanForwards() throws IOException { - CellSearcher searcher = null; - try { - searcher = DecoderFactory.checkOut(block, true); - - int i = -1; - while (searcher.advance()) { - ++i; - KeyValue inputCell = rows.getInputs().get(i); - Cell outputCell = searcher.current(); - - // check all 3 permutations of equals() - Assert.assertEquals(inputCell, outputCell); - Assert.assertEquals(outputCell, inputCell); - Assert.assertTrue(CellComparator.equals(inputCell, outputCell)); - } - Assert.assertEquals(rows.getInputs().size(), i + 1); - } finally { - DecoderFactory.checkIn(searcher); - } - } - - - @Test - public void testScanBackwards() throws IOException { - CellSearcher searcher = null; - try { - searcher = DecoderFactory.checkOut(block, true); - searcher.positionAfterLastCell(); - int i = -1; - while (searcher.previous()) { - ++i; - int oppositeIndex = rows.getInputs().size() - i - 1; - KeyValue inputKv = rows.getInputs().get(oppositeIndex); - KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); - Assert.assertEquals(inputKv, outputKv); - } - Assert.assertEquals(rows.getInputs().size(), i + 1); - } finally { - DecoderFactory.checkIn(searcher); - } - } - - - @Test - public void testRandomSeekHits() throws IOException { - CellSearcher searcher = null; - try { - searcher = DecoderFactory.checkOut(block, true); - for (KeyValue kv : rows.getInputs()) { - boolean hit = searcher.positionAt(kv); - Assert.assertTrue(hit); - Cell foundKv = searcher.current(); - Assert.assertTrue(CellComparator.equals(kv, foundKv)); - } - } finally { - DecoderFactory.checkIn(searcher); - } - } - - /** - * very hard to test nubs with this thing since the a nextRowKey function will usually skip them - */ - @Test - public void testRandomSeekMisses() throws IOException { - CellSearcher searcher = null; - List rowStartIndexes = rows.getRowStartIndexes(); - try { - searcher = DecoderFactory.checkOut(block, true); - for (int i=0; i < rows.getInputs().size(); ++i) { - KeyValue kv = rows.getInputs().get(i); - - //nextRow - KeyValue inputNextRow = KeyValueUtil.createFirstKeyInNextRow(kv); - - CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow); - boolean isFirstInRow = rowStartIndexes.contains(i); - if(isFirstInRow){ - int rowIndex = rowStartIndexes.indexOf(i); - if(rowIndex < rowStartIndexes.size() - 1){ -// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1; - Assert.assertEquals(CellScannerPosition.BEFORE, position); - /* - * Can't get this to work between nubs like rowB\x00 <-> rowBB - * - * No reason to doubt that it works, but will have to come up with a smarter test. - */ -// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell()); - } - } - - //previous KV - KeyValue inputPreviousKv = KeyValueUtil.previousKey(kv); - boolean hit = searcher.positionAt(inputPreviousKv); - Assert.assertFalse(hit); - position = searcher.positionAtOrAfter(inputPreviousKv); - if(CollectionUtils.isLastIndex(rows.getInputs(), i)){ - Assert.assertTrue(CellScannerPosition.AFTER_LAST == position); - }else{ - Assert.assertTrue(CellScannerPosition.AFTER == position); - /* - * TODO: why i+1 instead of i? - */ - Assert.assertEquals(rows.getInputs().get(i+1), searcher.current()); - } - } - } finally { - DecoderFactory.checkIn(searcher); - } - } - - - @Test - public void testRandomSeekIndividualAssertions() throws IOException { - CellSearcher searcher = null; - try { - searcher = DecoderFactory.checkOut(block, true); - rows.individualSearcherAssertions(searcher); - } finally { - DecoderFactory.checkIn(searcher); - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java deleted file mode 100644 index d3a2fa8..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row; - -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDeeper; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataEmpty; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNub; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSimple; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataTrivial; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrls; -import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; - -import com.google.common.collect.Lists; - -/* - * A master class for registering different implementations of TestRowData. - */ -public interface TestRowData { - - List getInputs(); - List getRowStartIndexes(); - - void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta); - - void individualSearcherAssertions(CellSearcher searcher); - - public static class InMemory { - - /* - * The following are different styles of data that the codec may encounter. Having these small - * representations of the data helps pinpoint what is wrong if the encoder breaks. - */ - public static Collection getAll() { - List all = Lists.newArrayList(); - //simple - all.add(new TestRowDataEmpty()); - all.add(new TestRowDataTrivial()); - all.add(new TestRowDataSimple()); - all.add(new TestRowDataDeeper()); - - //more specific - all.add(new TestRowDataSingleQualifier()); -// all.add(new TestRowDataMultiFamilies());//multiple families disabled in PrefixTreeEncoder - all.add(new TestRowDataNub()); - all.add(new TestRowDataSearcherRowMiss()); - all.add(new TestRowDataQualifierByteOrdering()); - all.add(new TestRowDataComplexQualifiers()); - all.add(new TestRowDataDifferentTimestamps()); - - //larger data volumes (hard to debug) - all.add(new TestRowDataNumberStrings()); - all.add(new TestRowDataUrls()); - all.add(new TestRowDataUrlsExample()); - all.add(new TestRowDataExerciseFInts()); - all.add(new TestRowDataRandomKeyValues()); - return all; - } - - public static Collection getAllAsObjectArray() { - List all = Lists.newArrayList(); - for (TestRowData testRows : getAll()) { - all.add(new Object[] { testRows }); - } - return all; - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java deleted file mode 100644 index b2e81b9..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.List; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -import com.google.common.collect.Lists; - -@RunWith(Parameterized.class) -public class TestRowEncoder { - - protected static int BLOCK_START = 7; - - @Parameters - public static Collection parameters() { - List parameters = Lists.newArrayList(); - for (TestRowData testRows : TestRowData.InMemory.getAll()) { - parameters.add(new Object[] { testRows }); - } - return parameters; - } - - protected TestRowData rows; - protected List inputKvs; - protected boolean includeMemstoreTS = true; - protected ByteArrayOutputStream os; - protected PrefixTreeEncoder encoder; - protected int totalBytes; - protected PrefixTreeBlockMeta blockMetaWriter; - protected byte[] outputBytes; - protected ByteBuffer buffer; - protected ByteArrayInputStream is; - protected PrefixTreeBlockMeta blockMetaReader; - protected byte[] inputBytes; - protected PrefixTreeArraySearcher searcher; - - public TestRowEncoder(TestRowData testRows) { - this.rows = testRows; - } - - @Before - public void compile() throws IOException { - os = new ByteArrayOutputStream(1 << 20); - encoder = new PrefixTreeEncoder(os, includeMemstoreTS); - - inputKvs = rows.getInputs(); - for (KeyValue kv : inputKvs) { - encoder.write(kv); - } - encoder.flush(); - totalBytes = encoder.getTotalBytes(); - blockMetaWriter = encoder.getBlockMeta(); - outputBytes = os.toByteArray(); - - // start reading, but save the assertions for @Test methods - buffer = ByteBuffer.wrap(outputBytes); - blockMetaReader = new PrefixTreeBlockMeta(buffer); - - searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), - blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength()); - searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); - } - - @Test - public void testEncoderOutput() throws IOException { - Assert.assertEquals(totalBytes, outputBytes.length); - Assert.assertEquals(blockMetaWriter, blockMetaReader); - } - - @Test - public void testForwardScanner() { - int counter = -1; - while (searcher.advance()) { - ++counter; - KeyValue inputKv = rows.getInputs().get(counter); - KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); - assertKeyAndValueEqual(inputKv, outputKv); - } - // assert same number of cells - Assert.assertEquals(rows.getInputs().size(), counter + 1); - } - - - /** - * probably not needed since testReverseScannerWithJitter() below is more thorough - */ - @Test - public void testReverseScanner() { - searcher.positionAfterLastCell(); - int counter = -1; - while (searcher.previous()) { - ++counter; - int oppositeIndex = rows.getInputs().size() - counter - 1; - KeyValue inputKv = rows.getInputs().get(oppositeIndex); - KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); - assertKeyAndValueEqual(inputKv, outputKv); - } - Assert.assertEquals(rows.getInputs().size(), counter + 1); - } - - - /** - * Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically - * a special fan index. - */ - @Test - public void testReverseScannerWithJitter() { - searcher.positionAfterLastCell(); - int counter = -1; - while (true) { - boolean foundCell = searcher.previous(); - if (!foundCell) { - break; - } - ++counter; - - // a next+previous should cancel out - if (!searcher.isAfterLast()) { - searcher.advance(); - searcher.previous(); - } - - int oppositeIndex = rows.getInputs().size() - counter - 1; - KeyValue inputKv = rows.getInputs().get(oppositeIndex); - KeyValue outputKv = KeyValueUtil.copyToNewKeyValue(searcher.current()); - assertKeyAndValueEqual(inputKv, outputKv); - } - Assert.assertEquals(rows.getInputs().size(), counter + 1); - } - - @Test - public void testIndividualBlockMetaAssertions() { - rows.individualBlockMetaAssertions(blockMetaReader); - } - - - /**************** helper **************************/ - - protected void assertKeyAndValueEqual(Cell expected, Cell actual) { - // assert keys are equal (doesn't compare values) - Assert.assertEquals(expected, actual); - if (includeMemstoreTS) { - Assert.assertEquals(expected.getMvccVersion(), actual.getMvccVersion()); - } - // assert values equal - Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(), - expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(), - actual.getValueLength())); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java deleted file mode 100644 index 8f9fa33..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataComplexQualifiers extends BaseTestRowData{ - - static byte[] - Arow = Bytes.toBytes("Arow"), - cf = PrefixTreeTestConstants.TEST_CF, - v0 = Bytes.toBytes("v0"); - - static List qualifiers = Lists.newArrayList(); - static { - List qualifierStrings = Lists.newArrayList(); - qualifierStrings.add("cq"); - qualifierStrings.add("cq0"); - qualifierStrings.add("cq1"); - qualifierStrings.add("cq2"); - qualifierStrings.add("dq0");// second root level fan - qualifierStrings.add("dq1");// nub - qualifierStrings.add("dq111");// leaf on nub - qualifierStrings.add("dq11111a");// leaf on leaf - for (String s : qualifierStrings) { - qualifiers.add(Bytes.toBytes(s)); - } - } - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - for (byte[] qualifier : qualifiers) { - d.add(new KeyValue(Arow, cf, qualifier, ts, v0)); - } - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java deleted file mode 100644 index 4a80507..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -/* - * Goes beyond a trivial trie to add a branch on the "cf" node - */ -public class TestRowDataDeeper extends BaseTestRowData{ - - static byte[] - cdc = Bytes.toBytes("cdc"), - cf6 = Bytes.toBytes("cf6"), - cfc = Bytes.toBytes("cfc"), - f = Bytes.toBytes("f"), - q = Bytes.toBytes("q"), - v = Bytes.toBytes("v"); - - static long - ts = 55L; - - static List d = Lists.newArrayList(); - static{ - d.add(new KeyValue(cdc, f, q, ts, v)); - d.add(new KeyValue(cf6, f, q, ts, v)); - d.add(new KeyValue(cfc, f, q, ts, v)); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { - //0: token:c; fan:d,f - //1: token:f; fan:6,c - //2: leaves - Assert.assertEquals(3, blockMeta.getRowTreeDepth()); - } - - @Override - public void individualSearcherAssertions(CellSearcher searcher) { - /** - * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only - * rA, but rB as well. - */ - KeyValue cfcRow = KeyValue.createFirstOnRow(Bytes.toBytes("cfc")); - CellScannerPosition position = searcher.positionAtOrAfter(cfcRow); - Assert.assertEquals(CellScannerPosition.AFTER, position); - Assert.assertEquals(d.get(2), searcher.current()); - searcher.previous(); - Assert.assertEquals(d.get(1), searcher.current()); - } -} - - diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java deleted file mode 100644 index 5ef79e6..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -/* - * test different timestamps - */ -public class TestRowDataDifferentTimestamps extends BaseTestRowData{ - - static byte[] - Arow = Bytes.toBytes("Arow"), - Brow = Bytes.toBytes("Brow"), - cf = Bytes.toBytes("fammy"), - cq0 = Bytes.toBytes("cq0"), - cq1 = Bytes.toBytes("cq1"), - v0 = Bytes.toBytes("v0"); - - static List d = Lists.newArrayList(); - static{ - KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0); - kv0.setMvccVersion(123456789L); - d.add(kv0); - - KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0); - kv1.setMvccVersion(3L); - d.add(kv1); - - KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0); - kv2.setMvccVersion(65537L); - d.add(kv2); - - //watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders -// d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0)); - KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0); - kv3.setMvccVersion(1L); - d.add(kv3); - - KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0); - //don't set memstoreTS - d.add(kv4); - - KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0); - kv5.setMvccVersion(0L); - d.add(kv5); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { - Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0); - Assert.assertEquals(12, blockMeta.getNumValueBytes()); - - Assert.assertFalse(blockMeta.isAllSameTimestamp()); - Assert.assertNotNull(blockMeta.getMinTimestamp()); - Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0); - Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0); - - Assert.assertFalse(blockMeta.isAllSameMvccVersion()); - Assert.assertNotNull(blockMeta.getMinMvccVersion()); - Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0); - Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java deleted file mode 100644 index ace18b3..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValue.Type; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataEmpty extends BaseTestRowData{ - - private static byte[] b = new byte[0]; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(b, b, b, 0L, Type.Put, b)); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java deleted file mode 100644 index d586884..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -/* - * test different timestamps - * - * http://pastebin.com/7ks8kzJ2 - * http://pastebin.com/MPn03nsK - */ -public class TestRowDataExerciseFInts extends BaseTestRowData{ - - static List rows; - static{ - List rowStrings = new ArrayList(); - rowStrings.add("com.edsBlog/directoryAa/pageAaa"); - rowStrings.add("com.edsBlog/directoryAa/pageBbb"); - rowStrings.add("com.edsBlog/directoryAa/pageCcc"); - rowStrings.add("com.edsBlog/directoryAa/pageDdd"); - rowStrings.add("com.edsBlog/directoryBb/pageEee"); - rowStrings.add("com.edsBlog/directoryBb/pageFff"); - rowStrings.add("com.edsBlog/directoryBb/pageGgg"); - rowStrings.add("com.edsBlog/directoryBb/pageHhh"); - rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); - rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); - rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); - rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); - rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); - rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); - rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); - rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); - ByteRangeTreeSet ba = new ByteRangeTreeSet(); - for(String row : rowStrings){ - ba.add(new ByteRange(Bytes.toBytes(row))); - } - rows = ba.compile().getSortedRanges(); - } - - static List cols = Lists.newArrayList(); - static{ - cols.add("Chrome"); - cols.add("Chromeb"); - cols.add("Firefox"); - cols.add("InternetExplorer"); - cols.add("Opera"); - cols.add("Safari"); - cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier"); - cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); - } - - static long ts = 1234567890; - - static int MAX_VALUE = 50; - - static List kvs = Lists.newArrayList(); - static { - for (ByteRange row : rows) { - for (String col : cols) { - KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, - Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); - kvs.add(kv); - } - } - } - - @Override - public List getInputs() { - return kvs; - } - - @Override - public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { - Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1); - Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java deleted file mode 100644 index 00c0375..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataMultiFamilies extends BaseTestRowData{ - - static byte[] - rowA = Bytes.toBytes("rowA"), - rowB = Bytes.toBytes("rowB"), - famA = Bytes.toBytes("famA"), - famB = Bytes.toBytes("famB"), - famBB = Bytes.toBytes("famBB"), - q0 = Bytes.toBytes("q0"), - q1 = Bytes.toBytes("q1"),//start with a different character - vvv = Bytes.toBytes("vvv"); - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(rowA, famA, q0, ts, vvv)); - d.add(new KeyValue(rowA, famB, q1, ts, vvv)); - d.add(new KeyValue(rowA, famBB, q0, ts, vvv)); - d.add(new KeyValue(rowB, famA, q0, ts, vvv)); - d.add(new KeyValue(rowB, famA, q1, ts, vvv)); - d.add(new KeyValue(rowB, famB, q0, ts, vvv)); - d.add(new KeyValue(rowB, famBB, q0, ts, vvv)); - d.add(new KeyValue(rowB, famBB, q1, ts, vvv)); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java deleted file mode 100644 index b4250dd..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataNub extends BaseTestRowData{ - - static byte[] - rowA = Bytes.toBytes("rowA"), - rowB = Bytes.toBytes("rowB"),//nub - rowBB = Bytes.toBytes("rowBB"), - cf = PrefixTreeTestConstants.TEST_CF, - cq0 = Bytes.toBytes("cq0"), - cq1 = Bytes.toBytes("cq1"), - v0 = Bytes.toBytes("v0"); - - static long - ts = 55L; - - static List d = Lists.newArrayList(); - static{ - d.add(new KeyValue(rowA, cf, cq0, ts, v0)); - d.add(new KeyValue(rowA, cf, cq1, ts, v0)); - d.add(new KeyValue(rowB, cf, cq0, ts, v0)); - d.add(new KeyValue(rowB, cf, cq1, ts, v0)); - d.add(new KeyValue(rowBB, cf, cq0, ts, v0)); - d.add(new KeyValue(rowBB, cf, cq1, ts, v0)); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java deleted file mode 100644 index 37afc54..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.Collections; -import java.util.List; - -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValue.Type; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataNumberStrings extends BaseTestRowData{ - - static List d = Lists.newArrayList(); - static { - - /** - * Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings - *

    - * This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to - * the previous nub. It should do 11->10, but it was incorrectly doing 11->1 - */ - List problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end - for(Integer i : problematicSeries){ -// for(int i=0; i < 13; ++i){ - byte[] row = Bytes.toBytes(""+i); - byte[] family = Bytes.toBytes("F"); - byte[] column = Bytes.toBytes("C"); - byte[] value = Bytes.toBytes("V"); - - d.add(new KeyValue(row, family, column, 0L, Type.Put, value)); - } - Collections.sort(d, new CellComparator()); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java deleted file mode 100644 index cacca64..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataQualifierByteOrdering extends BaseTestRowData{ - - static byte[] - Arow = Bytes.toBytes("Arow"), - Brow = Bytes.toBytes("Brow"), - Brow2 = Bytes.toBytes("Brow2"), - fam = Bytes.toBytes("HappyFam"), - cq0 = Bytes.toBytes("cq0"), - cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat - cq2 = Bytes.toBytes("cq2"), - v0 = Bytes.toBytes("v0"); - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(Arow, fam, cq0, ts, v0)); - d.add(new KeyValue(Arow, fam, cq1, ts, v0)); - d.add(new KeyValue(Brow, fam, cq0, ts, v0)); - d.add(new KeyValue(Brow, fam, cq2, ts, v0)); - d.add(new KeyValue(Brow2, fam, cq1, ts, v0)); - d.add(new KeyValue(Brow2, fam, cq2, ts, v0)); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java deleted file mode 100644 index 0fb7a5b..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataRandomKeyValues extends BaseTestRowData { - - static List d = Lists.newArrayList(); - static RedundantKVGenerator generator = new RedundantKVGenerator(); - static { - d = generator.generateTestKeyValues(1 << 10); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java deleted file mode 100644 index fd16a7b..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -public class TestRowDataSearcherRowMiss extends BaseTestRowData{ - - static byte[] - //don't let the rows share any common prefix bytes - A = Bytes.toBytes("A"), - AA = Bytes.toBytes("AA"), - AAA = Bytes.toBytes("AAA"), - B = Bytes.toBytes("B"), - cf = Bytes.toBytes("fam"), - cq = Bytes.toBytes("cq0"), - v = Bytes.toBytes("v0"); - - static long - ts = 55L; - - static List d = Lists.newArrayList(); - static{ - d.add(new KeyValue(A, cf, cq, ts, v)); - d.add(new KeyValue(AA, cf, cq, ts, v)); - d.add(new KeyValue(AAA, cf, cq, ts, v)); - d.add(new KeyValue(B, cf, cq, ts, v)); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public void individualSearcherAssertions(CellSearcher searcher) { - assertRowOffsetsCorrect(); - - searcher.resetToBeforeFirstEntry(); - - //test first cell - searcher.advance(); - Cell first = searcher.current(); - Assert.assertTrue(CellComparator.equals(d.get(0), first)); - - //test first cell in second row - Assert.assertTrue(searcher.positionAt(d.get(1))); - Assert.assertTrue(CellComparator.equals(d.get(1), searcher.current())); - - testBetween1and2(searcher); - testBetween2and3(searcher); - } - - /************ private methods, call from above *******************/ - - private void assertRowOffsetsCorrect(){ - Assert.assertEquals(4, getRowStartIndexes().size()); - } - - private void testBetween1and2(CellSearcher searcher){ - CellScannerPosition p;//reuse - Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v); - - //test exact - Assert.assertFalse(searcher.positionAt(betweenAAndAAA)); - - //test atOrBefore - p = searcher.positionAtOrBefore(betweenAAndAAA); - Assert.assertEquals(CellScannerPosition.BEFORE, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(1))); - - //test atOrAfter - p = searcher.positionAtOrAfter(betweenAAndAAA); - Assert.assertEquals(CellScannerPosition.AFTER, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(2))); - } - - private void testBetween2and3(CellSearcher searcher){ - CellScannerPosition p;//reuse - Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v); - - //test exact - Assert.assertFalse(searcher.positionAt(betweenAAAndB)); - - //test atOrBefore - p = searcher.positionAtOrBefore(betweenAAAndB); - Assert.assertEquals(CellScannerPosition.BEFORE, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(2))); - - //test atOrAfter - p = searcher.positionAtOrAfter(betweenAAAndB); - Assert.assertEquals(CellScannerPosition.AFTER, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(3))); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java deleted file mode 100644 index 64c60ef..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.Cell; -import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.CollectionUtils; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -public class TestRowDataSimple extends BaseTestRowData { - - static byte[] - // don't let the rows share any common prefix bytes - rowA = Bytes.toBytes("Arow"), - rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"), - cq0 = Bytes.toBytes("cq0"), - cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat - cq2 = Bytes.toBytes("dcq2"),// start with a different character - v0 = Bytes.toBytes("v0"); - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(rowA, cf, cq0, ts, v0)); - d.add(new KeyValue(rowA, cf, cq1, ts, v0)); - d.add(new KeyValue(rowA, cf, cq2, ts, v0)); - d.add(new KeyValue(rowB, cf, cq0, ts, v0)); - d.add(new KeyValue(rowB, cf, cq1, ts, v0)); - d.add(new KeyValue(rowB, cf, cq2, ts, v0)); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public void individualSearcherAssertions(CellSearcher searcher) { - CellScannerPosition p;// reuse - searcher.resetToBeforeFirstEntry(); - - // test first cell - searcher.advance(); - Cell first = searcher.current(); - Assert.assertTrue(CellComparator.equals(d.get(0), first)); - - // test first cell in second row - Assert.assertTrue(searcher.positionAt(d.get(3))); - Assert.assertTrue(CellComparator.equals(d.get(3), searcher.current())); - - Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0); - - // test exact - Assert.assertFalse(searcher.positionAt(between4And5)); - - // test atOrBefore - p = searcher.positionAtOrBefore(between4And5); - Assert.assertEquals(CellScannerPosition.BEFORE, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(4))); - - // test atOrAfter - p = searcher.positionAtOrAfter(between4And5); - Assert.assertEquals(CellScannerPosition.AFTER, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(5))); - - // test when key falls before first key in block - Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0); - Assert.assertFalse(searcher.positionAt(beforeFirst)); - p = searcher.positionAtOrBefore(beforeFirst); - Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p); - p = searcher.positionAtOrAfter(beforeFirst); - Assert.assertEquals(CellScannerPosition.AFTER, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), d.get(0))); - Assert.assertEquals(d.get(0), searcher.current()); - - // test when key falls after last key in block - Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z - Assert.assertFalse(searcher.positionAt(afterLast)); - p = searcher.positionAtOrAfter(afterLast); - Assert.assertEquals(CellScannerPosition.AFTER_LAST, p); - p = searcher.positionAtOrBefore(afterLast); - Assert.assertEquals(CellScannerPosition.BEFORE, p); - Assert.assertTrue(CellComparator.equals(searcher.current(), CollectionUtils.getLast(d))); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java deleted file mode 100644 index 21f6083..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -public class TestRowDataSingleQualifier extends BaseTestRowData{ - - static byte[] - rowA = Bytes.toBytes("rowA"), - rowB = Bytes.toBytes("rowB"), - cf = PrefixTreeTestConstants.TEST_CF, - cq0 = Bytes.toBytes("cq0"), - v0 = Bytes.toBytes("v0"); - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(rowA, cf, cq0, ts, v0)); - d.add(new KeyValue(rowB, cf, cq0, ts, v0)); - } - - @Override - public List getInputs() { - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java deleted file mode 100644 index 108d2d7..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.codec.prefixtree.scanner.CellScannerPosition; -import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; -import org.junit.Assert; - -import com.google.common.collect.Lists; - -public class TestRowDataTrivial extends BaseTestRowData{ - - static byte[] - rA = Bytes.toBytes("rA"), - rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests - cf = Bytes.toBytes("fam"), - cq0 = Bytes.toBytes("q0"), - v0 = Bytes.toBytes("v0"); - - static long ts = 55L; - - static List d = Lists.newArrayList(); - static { - d.add(new KeyValue(rA, cf, cq0, ts, v0)); - d.add(new KeyValue(rB, cf, cq0, ts, v0)); - } - - @Override - public List getInputs() { - return d; - } - - @Override - public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { - // node[0] -> root[r] - // node[1] -> leaf[A], etc - Assert.assertEquals(2, blockMeta.getRowTreeDepth()); - } - - @Override - public void individualSearcherAssertions(CellSearcher searcher) { - /** - * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA, - * but rB as well. - */ - KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz")); - CellScannerPosition position = searcher.positionAtOrAfter(afterLast); - Assert.assertEquals(CellScannerPosition.AFTER_LAST, position); - Assert.assertNull(searcher.current()); - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java deleted file mode 100644 index 4d9b7a3..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.util.ByteRange; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; -import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; - -import com.google.common.collect.Lists; - -/* - * test different timestamps - * - * http://pastebin.com/7ks8kzJ2 - * http://pastebin.com/MPn03nsK - */ -public class TestRowDataUrls extends BaseTestRowData{ - - static List rows; - static{ - List rowStrings = new ArrayList(); - rowStrings.add("com.edsBlog/directoryAa/pageAaa"); - rowStrings.add("com.edsBlog/directoryAa/pageBbb"); - rowStrings.add("com.edsBlog/directoryAa/pageCcc"); - rowStrings.add("com.edsBlog/directoryAa/pageDdd"); - rowStrings.add("com.edsBlog/directoryBb/pageEee"); - rowStrings.add("com.edsBlog/directoryBb/pageFff"); - rowStrings.add("com.edsBlog/directoryBb/pageGgg"); - rowStrings.add("com.edsBlog/directoryBb/pageHhh"); - rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); - rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); - rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); - rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); - rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); - rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); - rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); - rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); - ByteRangeTreeSet ba = new ByteRangeTreeSet(); - for (String row : rowStrings) { - ba.add(new ByteRange(Bytes.toBytes(row))); - } - rows = ba.compile().getSortedRanges(); - } - - static List cols = Lists.newArrayList(); - static { - cols.add("Chrome"); - cols.add("Chromeb"); - cols.add("Firefox"); - cols.add("InternetExplorer"); - cols.add("Opera"); - cols.add("Safari"); - } - - static long ts = 1234567890; - - static int MAX_VALUE = 50; - - static List kvs = Lists.newArrayList(); - static { - for (ByteRange row : rows) { - for (String col : cols) { - KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, - Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); - kvs.add(kv); - // System.out.println("TestRows5:"+kv); - } - } - } - - @Override - public List getInputs() { - return kvs; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java deleted file mode 100644 index d03bac9..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.row.data; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueTestUtil; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; -import org.apache.hbase.codec.prefixtree.encode.column.ColumnNodeWriter; -import org.apache.hbase.codec.prefixtree.encode.row.RowNodeWriter; -import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; -import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; - -import com.google.common.collect.Lists; - -/* - * test different timestamps - * - * http://pastebin.com/7ks8kzJ2 - * http://pastebin.com/MPn03nsK - */ -public class TestRowDataUrlsExample extends BaseTestRowData{ - - static String TENANT_ID = Integer.toString(95322); - static String APP_ID = Integer.toString(12); - static List URLS = Lists.newArrayList( - "com.dablog/2011/10/04/boating", - "com.dablog/2011/10/09/lasers", - "com.jamiesrecipes", //this nub helped find a bug - "com.jamiesrecipes/eggs"); - static String FAMILY = "hits"; - static List BROWSERS = Lists.newArrayList( - "Chrome", "IE8", "IE9beta");//, "Opera", "Safari"); - static long TIMESTAMP = 1234567890; - - static int MAX_VALUE = 50; - - static List kvs = Lists.newArrayList(); - static{ - for(String rowKey : URLS){ - for(String qualifier : BROWSERS){ - KeyValue kv = new KeyValue( - Bytes.toBytes(rowKey), - Bytes.toBytes(FAMILY), - Bytes.toBytes(qualifier), - TIMESTAMP, - KeyValue.Type.Put, - Bytes.toBytes("VvvV")); - kvs.add(kv); - } - } - } - - /** - * Used for generating docs. - */ - public static void main(String... args) throws IOException{ - System.out.println("-- inputs --"); - System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true)); - ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20); - PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false); - - for(KeyValue kv : kvs){ - encoder.write(kv); - } - encoder.flush(); - - System.out.println("-- qualifier SortedPtBuilderNodes --"); - for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){ - System.out.println(tokenizer); - } - for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){ - System.out.println(tokenizerNode); - } - - System.out.println("-- qualifier PtColumnNodeWriters --"); - for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){ - System.out.println(writer); - } - - System.out.println("-- rowKey SortedPtBuilderNodes --"); - for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){ - System.out.println(tokenizerNode); - } - for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){ - System.out.println(tokenizerNode); - } - - System.out.println("-- row PtRowNodeWriters --"); - for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){ - System.out.println(writer); - } - for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){ - System.out.println(writer); - } - - System.out.println("-- concatenated values --"); - System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray())); - } - - @Override - public List getInputs() { - return kvs; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java deleted file mode 100644 index 6db1a80..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.timestamp; - -import java.util.Collection; -import java.util.List; - -import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic; -import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers; -import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats; - -import com.google.common.collect.Lists; - -public interface TestTimestampData { - - List getInputs(); - long getMinimum(); - List getOutputs(); - - public static class InMemory { - public Collection getAllAsObjectArray() { - List all = Lists.newArrayList(); - all.add(new Object[] { new TestTimestampDataBasic() }); - all.add(new Object[] { new TestTimestampDataNumbers() }); - all.add(new Object[] { new TestTimestampDataRepeats() }); - return all; - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java deleted file mode 100644 index 183eba2..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.timestamp; - -import java.io.IOException; -import java.util.Collection; - -import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; -import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) -public class TestTimestampEncoder { - - @Parameters - public static Collection parameters() { - return new TestTimestampData.InMemory().getAllAsObjectArray(); - } - - private TestTimestampData timestamps; - private PrefixTreeBlockMeta blockMeta; - private LongEncoder encoder; - private byte[] bytes; - private TimestampDecoder decoder; - - public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException { - this.timestamps = testTimestamps; - this.blockMeta = new PrefixTreeBlockMeta(); - this.blockMeta.setNumMetaBytes(0); - this.blockMeta.setNumRowBytes(0); - this.blockMeta.setNumQualifierBytes(0); - this.encoder = new LongEncoder(); - for (Long ts : testTimestamps.getInputs()) { - encoder.add(ts); - } - encoder.compile(); - blockMeta.setTimestampFields(encoder); - bytes = encoder.getByteArray(); - decoder = new TimestampDecoder(); - decoder.initOnBlock(blockMeta, bytes); - } - - @Test - public void testCompressorMinimum() { - Assert.assertEquals(timestamps.getMinimum(), encoder.getMin()); - } - - @Test - public void testCompressorRoundTrip() { - long[] outputs = encoder.getSortedUniqueTimestamps(); - for (int i = 0; i < timestamps.getOutputs().size(); ++i) { - long input = timestamps.getOutputs().get(i); - long output = outputs[i]; - Assert.assertEquals(input, output); - } - } - - @Test - public void testReaderMinimum() { - Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0)); - } - - @Test - public void testReaderRoundTrip() { - for (int i = 0; i < timestamps.getOutputs().size(); ++i) { - long input = timestamps.getOutputs().get(i); - long output = decoder.getLong(i); - Assert.assertEquals(input, output); - } - } -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java deleted file mode 100644 index 5c0da94..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.timestamp.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; - -public class TestTimestampDataBasic implements TestTimestampData { - - @Override - public List getInputs() { - List d = new ArrayList(); - d.add(5L); - d.add(3L); - d.add(0L); - d.add(1L); - d.add(3L); - return d; - } - - @Override - public long getMinimum() { - return 0L; - } - - @Override - public List getOutputs() { - List d = new ArrayList(); - d.add(0L); - d.add(1L); - d.add(3L); - d.add(5L); - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java deleted file mode 100644 index 4c1cbd5..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.timestamp.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; - -public class TestTimestampDataNumbers implements TestTimestampData { - - private int shift = 8; - - @Override - public List getInputs() { - List d = new ArrayList(); - d.add(5L << shift); - d.add(3L << shift); - d.add(7L << shift); - d.add(1L << shift); - d.add(3L << shift); - return d; - } - - @Override - public long getMinimum() { - return 1L << shift; - } - - @Override - public List getOutputs() { - List d = new ArrayList(); - d.add(1L << shift); - d.add(3L << shift); - d.add(5L << shift); - d.add(7L << shift); - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java deleted file mode 100644 index 985d584..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.codec.prefixtree.timestamp.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; - -public class TestTimestampDataRepeats implements TestTimestampData { - - private static long t = 1234567890L; - - @Override - public List getInputs() { - List d = new ArrayList(); - d.add(t); - d.add(t); - d.add(t); - d.add(t); - d.add(t); - return d; - } - - @Override - public long getMinimum() { - return t; - } - - @Override - public List getOutputs() { - List d = new ArrayList(); - return d; - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java deleted file mode 100644 index 41bbb6a..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.bytes; - -import junit.framework.Assert; - -import org.apache.hadoop.hbase.util.ByteRange; -import org.junit.Test; - -public class TestByteRange { - - @Test - public void testConstructor() { - ByteRange b = new ByteRange(new byte[] { 0, 1, 2 }); - Assert.assertEquals(3, b.getLength()); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java deleted file mode 100644 index 8cf7bd9..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.comparator; - -import java.util.Comparator; - -import org.apache.hadoop.hbase.util.Bytes; - -public class ByteArrayComparator implements Comparator { - - @Override - public int compare(byte[] a, byte[] b) { - return Bytes.compareTo(a, b); - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java deleted file mode 100644 index 05f9c02..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.number; - -import java.text.DecimalFormat; - -public class NumberFormatter { - - public static String addCommas(final Number pValue) { - if (pValue == null) { - return null; - } - String format = "###,###,###,###,###,###,###,###.#####################"; - return new DecimalFormat(format).format(pValue);// biggest is 19 digits - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java deleted file mode 100644 index 57fd8f5..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.number; - -import java.util.Random; - -public class RandomNumberUtils { - - public static long nextPositiveLong(Random random) { - while (true) { - long value = random.nextLong(); - if (value > 0) { - return value; - } - } - } - -} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java deleted file mode 100644 index 579af34..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import org.junit.Assert; -import org.junit.Test; - -/********************** tests *************************/ - -public class TestFIntTool { - @Test - public void testLeadingZeros() { - Assert.assertEquals(64, Long.numberOfLeadingZeros(0)); - Assert.assertEquals(63, Long.numberOfLeadingZeros(1)); - Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE)); - Assert.assertEquals(0, Long.numberOfLeadingZeros(-1)); - Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE)); - Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1)); - } - - @Test - public void testMaxValueForNumBytes() { - Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1)); - Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2)); - Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3)); - Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7)); - } - - @Test - public void testNumBytes() { - Assert.assertEquals(1, UFIntTool.numBytes(0)); - Assert.assertEquals(1, UFIntTool.numBytes(1)); - Assert.assertEquals(1, UFIntTool.numBytes(255)); - Assert.assertEquals(2, UFIntTool.numBytes(256)); - Assert.assertEquals(2, UFIntTool.numBytes(65535)); - Assert.assertEquals(3, UFIntTool.numBytes(65536)); - Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL)); - Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L)); - Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE)); - Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE)); - Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1)); - } - - @Test - public void testGetBytes() { - Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0)); - Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1)); - Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255)); - Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256)); - Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3)); - Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128)); - Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255)); - Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, - UFIntTool.getBytes(4, Integer.MAX_VALUE)); - Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, - UFIntTool.getBytes(8, Long.MAX_VALUE)); - } - - @Test - public void testFromBytes() { - Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 })); - Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 })); - Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 })); - Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 })); - Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 })); - Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 })); - Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 })); - Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 })); - Assert.assertEquals(Long.MAX_VALUE, - UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 })); - } - - @Test - public void testRoundTrips() { - long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL, - 0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE, - (long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE }; - for (int i = 0; i < values.length; ++i) { - Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i]))); - } - } - - @Test - public void testWriteBytes() throws IOException {// copied from testGetBytes - Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0)); - Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1)); - Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255)); - Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256)); - Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3)); - Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128)); - Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255)); - Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, - bytesViaOutputStream(4, Integer.MAX_VALUE)); - Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, - bytesViaOutputStream(8, Long.MAX_VALUE)); - } - - private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - UFIntTool.writeBytes(outputWidth, value, os); - return os.toByteArray(); - } -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java deleted file mode 100644 index 1fc4064..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.Random; - -import org.junit.Assert; -import org.junit.Test; - -public class TestVIntTool { - - @Test - public void testNumBytes() { - Assert.assertEquals(1, UVIntTool.numBytes(0)); - Assert.assertEquals(1, UVIntTool.numBytes(1)); - Assert.assertEquals(1, UVIntTool.numBytes(100)); - Assert.assertEquals(1, UVIntTool.numBytes(126)); - Assert.assertEquals(1, UVIntTool.numBytes(127)); - Assert.assertEquals(2, UVIntTool.numBytes(128)); - Assert.assertEquals(2, UVIntTool.numBytes(129)); - Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE)); - } - - @Test - public void testWriteBytes() throws IOException { - Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0)); - Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1)); - Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63)); - Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127)); - Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128)); - Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155)); - Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE)); - } - - private byte[] bytesViaOutputStream(int value) throws IOException { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - UVIntTool.writeBytes(value, os); - return os.toByteArray(); - } - - @Test - public void testToBytes() { - Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0)); - Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1)); - Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63)); - Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127)); - Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128)); - Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155)); - Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE)); - } - - @Test - public void testFromBytes() { - Assert.assertEquals(Integer.MAX_VALUE, UVIntTool.getInt(UVIntTool.MAX_VALUE_BYTES)); - } - - @Test - public void testRoundTrips() { - Random random = new Random(); - for (int i = 0; i < 10000; ++i) { - int value = random.nextInt(Integer.MAX_VALUE); - byte[] bytes = UVIntTool.getBytes(value); - int roundTripped = UVIntTool.getInt(bytes); - Assert.assertEquals(value, roundTripped); - } - } - - @Test - public void testInputStreams() throws IOException { - ByteArrayInputStream is; - is = new ByteArrayInputStream(new byte[] { 0 }); - Assert.assertEquals(0, UVIntTool.getInt(is)); - is = new ByteArrayInputStream(new byte[] { 5 }); - Assert.assertEquals(5, UVIntTool.getInt(is)); - is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); - Assert.assertEquals(155, UVIntTool.getInt(is)); - } - -} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java deleted file mode 100644 index e294abd..0000000 --- a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hbase.util.vint; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.util.Random; - -import org.apache.hbase.util.number.RandomNumberUtils; -import org.junit.Assert; -import org.junit.Test; - -public class TestVLongTool { - - @Test - public void testNumBytes() { - Assert.assertEquals(1, UVLongTool.numBytes(0)); - Assert.assertEquals(1, UVLongTool.numBytes(1)); - Assert.assertEquals(1, UVLongTool.numBytes(100)); - Assert.assertEquals(1, UVLongTool.numBytes(126)); - Assert.assertEquals(1, UVLongTool.numBytes(127)); - Assert.assertEquals(2, UVLongTool.numBytes(128)); - Assert.assertEquals(2, UVLongTool.numBytes(129)); - Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE)); - } - - @Test - public void testToBytes() { - Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0)); - Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1)); - Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63)); - Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127)); - Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128)); - Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155)); - Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE)); - } - - @Test - public void testFromBytes() { - Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES)); - } - - @Test - public void testFromBytesOffset() { - Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES, 0)); - - long ms = 1318966363481L; -// System.out.println(ms); - byte[] bytes = UVLongTool.getBytes(ms); -// System.out.println(Arrays.toString(bytes)); - long roundTripped = UVLongTool.getLong(bytes, 0); - Assert.assertEquals(ms, roundTripped); - - int calculatedNumBytes = UVLongTool.numBytes(ms); - int actualNumBytes = bytes.length; - Assert.assertEquals(actualNumBytes, calculatedNumBytes); - - byte[] shiftedBytes = new byte[1000]; - int shift = 33; - System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length); - long shiftedRoundTrip = UVLongTool.getLong(shiftedBytes, shift); - Assert.assertEquals(ms, shiftedRoundTrip); - } - - @Test - public void testRoundTrips() { - Random random = new Random(); - for (int i = 0; i < 10000; ++i) { - long value = RandomNumberUtils.nextPositiveLong(random); - byte[] bytes = UVLongTool.getBytes(value); - long roundTripped = UVLongTool.getLong(bytes); - Assert.assertEquals(value, roundTripped); - int calculatedNumBytes = UVLongTool.numBytes(value); - int actualNumBytes = bytes.length; - Assert.assertEquals(actualNumBytes, calculatedNumBytes); - } - } - - @Test - public void testInputStreams() throws IOException { - ByteArrayInputStream is; - is = new ByteArrayInputStream(new byte[] { 0 }); - Assert.assertEquals(0, UVLongTool.getLong(is)); - is = new ByteArrayInputStream(new byte[] { 5 }); - Assert.assertEquals(5, UVLongTool.getLong(is)); - is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); - Assert.assertEquals(155, UVLongTool.getLong(is)); - } -} \ No newline at end of file