diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java index cba3d36..9cf055a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java @@ -38,7 +38,10 @@ public enum DataBlockEncoding { // id 1 is reserved for the BITSET algorithm to be added later PREFIX(2, createEncoder("org.apache.hadoop.hbase.io.encoding.PrefixKeyDeltaEncoder")), DIFF(3, createEncoder("org.apache.hadoop.hbase.io.encoding.DiffKeyDeltaEncoder")), - FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder")); + FAST_DIFF(4, createEncoder("org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder")), + // id 5 is reserved for the COPY_KEY algorithm for benchmarking + // COPY_KEY(5, createEncoder("org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder")), + PREFIX_TREE(6, createEncoder("org.apache.hbase.codec.prefixtree.PrefixTreeCodec")); private final short id; private final byte[] idInBytes; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java index 5dfe86b..dd7cce7 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.util; +import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; @@ -50,4 +52,14 @@ public class ByteRangeTool { return ranges; } + public static void write(OutputStream os, ByteRange byteRange) throws IOException { + os.write(byteRange.getBytes(), byteRange.getOffset(), byteRange.getLength()); + } + + public static void write(OutputStream os, ByteRange byteRange, int byteRangeInnerOffset) + throws IOException { + os.write(byteRange.getBytes(), byteRange.getOffset() + byteRangeInnerOffset, + byteRange.getLength() - byteRangeInnerOffset); + } + } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 015c9eb..7031bb2 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -27,8 +27,10 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.security.AccessController; import java.security.PrivilegedAction; +import java.util.Collection; import java.util.Comparator; import java.util.Iterator; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -43,6 +45,7 @@ import org.apache.hadoop.io.WritableUtils; import sun.misc.Unsafe; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; /** * Utility class that handles byte arrays, conversions to/from other types, @@ -1718,4 +1721,44 @@ public class Bytes { return out; } + public static boolean equals(List a, List b) { + if (a == null && b == null) { + return true; + } + if (a == null && b != null) { + return false; + } + if (a != null && b == null) { + return false; + } + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); ++i) { + if (!Bytes.equals(a.get(i), b.get(i))) { + return false; + } + } + return true; + } + + public static boolean isSorted(Collection arrays) { + byte[] previous = new byte[0]; + for (byte[] array : IterableUtils.nullSafe(arrays)) { + if (Bytes.compareTo(previous, array) > 0) { + return false; + } + previous = array; + } + return true; + } + + public static List getUtf8ByteArrays(List strings) { + List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(strings)); + for (String s : IterableUtils.nullSafe(strings)) { + byteArrays.add(Bytes.toBytes(s)); + } + return byteArrays; + } + } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java index 51932e0..0e45455 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java @@ -28,12 +28,15 @@ import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.io.WritableUtils; +import com.google.common.primitives.Bytes; + /** * Generate list of key values which are very useful to test data block encoding * and compression. */ public class RedundantKVGenerator { // row settings + static byte[] DEFAULT_COMMON_PREFIX = new byte[0]; static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10; static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6; static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3; @@ -107,6 +110,7 @@ public class RedundantKVGenerator { ) { this.randomizer = randomizer; + this.commonPrefix = DEFAULT_COMMON_PREFIX; this.numberOfRowPrefixes = numberOfRowPrefixes; this.averagePrefixLength = averagePrefixLength; this.prefixLengthVariance = prefixLengthVariance; @@ -115,7 +119,7 @@ public class RedundantKVGenerator { this.numberOfRows = numberOfRows; this.chanceForSameQualifier = chanceForSameQualifier; - this.chanceForSimiliarQualifier = chanceForSimiliarQualifier; + this.chanceForSimilarQualifier = chanceForSimiliarQualifier; this.averageQualifierLength = averageQualifierLength; this.qualifierLengthVariance = qualifierLengthVariance; @@ -131,6 +135,7 @@ public class RedundantKVGenerator { private Random randomizer; // row settings + private byte[] commonPrefix;//global prefix before rowPrefixes private int numberOfRowPrefixes; private int averagePrefixLength = 6; private int prefixLengthVariance = 3; @@ -138,9 +143,12 @@ public class RedundantKVGenerator { private int suffixLengthVariance = 3; private int numberOfRows = 500; + //family + private byte[] family; + // qualifier private float chanceForSameQualifier = 0.5f; - private float chanceForSimiliarQualifier = 0.4f; + private float chanceForSimilarQualifier = 0.4f; private int averageQualifierLength = 9; private int qualifierLengthVariance = 3; @@ -161,7 +169,8 @@ public class RedundantKVGenerator { prefixLengthVariance; byte[] newPrefix = new byte[prefixLength]; randomizer.nextBytes(newPrefix); - prefixes.add(newPrefix); + byte[] newPrefixWithCommon = newPrefix; + prefixes.add(newPrefixWithCommon); } // generate rest of the row @@ -173,7 +182,8 @@ public class RedundantKVGenerator { int randomPrefix = randomizer.nextInt(prefixes.size()); byte[] row = new byte[prefixes.get(randomPrefix).length + suffixLength]; - rows.add(row); + byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row); + rows.add(rowWithCommonPrefix); } return rows; @@ -188,20 +198,22 @@ public class RedundantKVGenerator { List result = new ArrayList(); List rows = generateRows(); - Map> rowsToQualifier = - new HashMap>(); + Map> rowsToQualifier = new HashMap>(); - byte[] family = new byte[columnFamilyLength]; - randomizer.nextBytes(family); + if(family==null){ + family = new byte[columnFamilyLength]; + randomizer.nextBytes(family); + } - long baseTimestamp = Math.abs(randomizer.nextLong()) / - baseTimestampDivide; + long baseTimestamp = Math.abs(randomizer.nextLong()) / baseTimestampDivide; byte[] value = new byte[valueLength]; for (int i = 0; i < howMany; ++i) { - long timestamp = baseTimestamp + randomizer.nextInt( - timestampDiffSize); + long timestamp = baseTimestamp; + if(timestampDiffSize > 0){ + timestamp += randomizer.nextInt(timestampDiffSize); + } Integer rowId = randomizer.nextInt(rows.size()); byte[] row = rows.get(rowId); @@ -209,13 +221,11 @@ public class RedundantKVGenerator { // occasionally completely different byte[] qualifier; float qualifierChance = randomizer.nextFloat(); - if (!rowsToQualifier.containsKey(rowId) || - qualifierChance > chanceForSameQualifier + - chanceForSimiliarQualifier) { + if (!rowsToQualifier.containsKey(rowId) + || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) { int qualifierLength = averageQualifierLength; - qualifierLength += - randomizer.nextInt(2 * qualifierLengthVariance + 1) - - qualifierLengthVariance; + qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1) + - qualifierLengthVariance; qualifier = new byte[qualifierLength]; randomizer.nextBytes(qualifier); @@ -227,8 +237,8 @@ public class RedundantKVGenerator { } else if (qualifierChance > chanceForSameQualifier) { // similar qualifier List previousQualifiers = rowsToQualifier.get(rowId); - byte[] originalQualifier = previousQualifiers.get( - randomizer.nextInt(previousQualifiers.size())); + byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers + .size())); qualifier = new byte[originalQualifier.length]; int commonPrefix = randomizer.nextInt(qualifier.length); @@ -241,8 +251,7 @@ public class RedundantKVGenerator { } else { // same qualifier List previousQualifiers = rowsToQualifier.get(rowId); - qualifier = previousQualifiers.get( - randomizer.nextInt(previousQualifiers.size())); + qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); } if (randomizer.nextFloat() < chanceForZeroValue) { @@ -286,5 +295,99 @@ public class RedundantKVGenerator { return result; } + + + /************************ get/set ***********************************/ + + public RedundantKVGenerator setCommonPrefix(byte[] prefix){ + this.commonPrefix = prefix; + return this; + } + + public RedundantKVGenerator setRandomizer(Random randomizer) { + this.randomizer = randomizer; + return this; + } + + public RedundantKVGenerator setNumberOfRowPrefixes(int numberOfRowPrefixes) { + this.numberOfRowPrefixes = numberOfRowPrefixes; + return this; + } + + public RedundantKVGenerator setAveragePrefixLength(int averagePrefixLength) { + this.averagePrefixLength = averagePrefixLength; + return this; + } + public RedundantKVGenerator setPrefixLengthVariance(int prefixLengthVariance) { + this.prefixLengthVariance = prefixLengthVariance; + return this; + } + + public RedundantKVGenerator setAverageSuffixLength(int averageSuffixLength) { + this.averageSuffixLength = averageSuffixLength; + return this; + } + + public RedundantKVGenerator setSuffixLengthVariance(int suffixLengthVariance) { + this.suffixLengthVariance = suffixLengthVariance; + return this; + } + + public RedundantKVGenerator setNumberOfRows(int numberOfRows) { + this.numberOfRows = numberOfRows; + return this; + } + + public RedundantKVGenerator setChanceForSameQualifier(float chanceForSameQualifier) { + this.chanceForSameQualifier = chanceForSameQualifier; + return this; + } + + public RedundantKVGenerator setChanceForSimilarQualifier(float chanceForSimiliarQualifier) { + this.chanceForSimilarQualifier = chanceForSimiliarQualifier; + return this; + } + + public RedundantKVGenerator setAverageQualifierLength(int averageQualifierLength) { + this.averageQualifierLength = averageQualifierLength; + return this; + } + + public RedundantKVGenerator setQualifierLengthVariance(int qualifierLengthVariance) { + this.qualifierLengthVariance = qualifierLengthVariance; + return this; + } + + public RedundantKVGenerator setColumnFamilyLength(int columnFamilyLength) { + this.columnFamilyLength = columnFamilyLength; + return this; + } + + public RedundantKVGenerator setFamily(byte[] family) { + this.family = family; + this.columnFamilyLength = family.length; + return this; + } + + public RedundantKVGenerator setValueLength(int valueLength) { + this.valueLength = valueLength; + return this; + } + + public RedundantKVGenerator setChanceForZeroValue(float chanceForZeroValue) { + this.chanceForZeroValue = chanceForZeroValue; + return this; + } + + public RedundantKVGenerator setBaseTimestampDivide(int baseTimestampDivide) { + this.baseTimestampDivide = baseTimestampDivide; + return this; + } + + public RedundantKVGenerator setTimestampDiffSize(int timestampDiffSize) { + this.timestampDiffSize = timestampDiffSize; + return this; + } + } diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java index 19d6b54..ce9d063 100644 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java @@ -79,22 +79,39 @@ public class CellComparator implements Comparator, Serializable{ /**************** equals ****************************/ public static boolean equals(Cell a, Cell b){ - if (!areKeyLengthsEqual(a, b)) { - return false; - } - //TODO compare byte[]'s in reverse since later bytes more likely to differ - return 0 == compareStatic(a, b); + return equalsRow(a, b) + && equalsFamily(a, b) + && equalsQualifier(a, b) + && equalsTimestamp(a, b) + && equalsType(a, b); } public static boolean equalsRow(Cell a, Cell b){ - if(!areRowLengthsEqual(a, b)){ - return false; - } - return 0 == Bytes.compareTo( + return Bytes.equals( a.getRowArray(), a.getRowOffset(), a.getRowLength(), b.getRowArray(), b.getRowOffset(), b.getRowLength()); } + public static boolean equalsFamily(Cell a, Cell b){ + return Bytes.equals( + a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), + b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); + } + + public static boolean equalsQualifier(Cell a, Cell b){ + return Bytes.equals( + a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), + b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); + } + + public static boolean equalsTimestamp(Cell a, Cell b){ + return a.getTimestamp() == b.getTimestamp(); + } + + public static boolean equalsType(Cell a, Cell b){ + return a.getTypeByte() == b.getTypeByte(); + } + /********************* hashCode ************************/ diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java index fcaf27e..6d46ec5 100644 --- a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java @@ -18,6 +18,8 @@ package org.apache.hbase.cell; +import java.io.IOException; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hbase.Cell; @@ -45,6 +47,6 @@ public interface CellOutputStream { * that can then be read from the implementation to be sent to disk, put in the block cache, or * sent over the network. */ - void flush(); + void flush() throws IOException; } diff --git a/hbase-prefix-tree/pom.xml b/hbase-prefix-tree/pom.xml new file mode 100644 index 0000000..845efdb --- /dev/null +++ b/hbase-prefix-tree/pom.xml @@ -0,0 +1,62 @@ + + + 4.0.0 + + hbase + org.apache.hbase + 0.95-SNAPSHOT + .. + + + hbase-prefix-tree + HBase - Prefix Tree + Prefix Tree Data Block Encoder + + + + + maven-surefire-plugin + + + + secondPartTestsExecution + test + + test + + + true + + + + + + + + + + org.apache.hbase + hbase-common + + + + diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java new file mode 100644 index 0000000..a696121 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeBlockMeta.java @@ -0,0 +1,841 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.apache.hbase.util.vint.UVIntTool; +import org.apache.hbase.util.vint.UVLongTool; + +/** + * Information about the block. Stored at the beginning of the byte[]. Contains things + * like minimum timestamp and width of FInts in the row tree. + * + * Most fields stored in VInts that get decoded on the first access of each new block. + */ +@InterfaceAudience.Private +public class PrefixTreeBlockMeta { + + /******************* static fields ********************/ + + public static final int VERSION = 0; + + public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE;// hard-coded in KeyValue + + public static final int + NUM_LONGS = 2, + NUM_INTS = 22, + NUM_SHORTS = 0,//keyValueTypeWidth not persisted + NUM_SINGLE_BYTES = 2, + MAX_BYTES = Bytes.SIZEOF_LONG * NUM_LONGS + + Bytes.SIZEOF_SHORT * NUM_SHORTS + + Bytes.SIZEOF_INT * NUM_INTS + + NUM_SINGLE_BYTES; + + + /**************** transient fields *********************/ + + protected int arrayOffset; + protected int bufferOffset; + + + /**************** persisted fields **********************/ + + // PrefixTree version to allow future format modifications + protected int version; + protected int numMetaBytes; + protected int numKeyValueBytes; + protected boolean includesMvccVersion;//probably don't need this explicitly, but only 1 byte + + // split the byte[] into 6 sections for the different data types + protected int numRowBytes; + protected int numFamilyBytes; + protected int numQualifierBytes; + protected int numTimestampBytes; + protected int numMvccVersionBytes; + protected int numValueBytes; + + // number of bytes in each section of fixed width FInts + protected int nextNodeOffsetWidth; + protected int familyOffsetWidth; + protected int qualifierOffsetWidth; + protected int timestampIndexWidth; + protected int mvccVersionIndexWidth; + protected int valueOffsetWidth; + protected int valueLengthWidth; + + // used to pre-allocate structures for reading + protected int rowTreeDepth; + protected int maxRowLength; + protected int maxQualifierLength; + + // the timestamp from which the deltas are calculated + protected long minTimestamp; + protected int timestampDeltaWidth; + protected long minMvccVersion; + protected int mvccVersionDeltaWidth; + + protected boolean allSameType; + protected byte allTypes; + + protected int numUniqueRows; + protected int numUniqueFamilies; + protected int numUniqueQualifiers; + + + /***************** constructors ********************/ + + public PrefixTreeBlockMeta() { + } + + public PrefixTreeBlockMeta(InputStream is) throws IOException{ + this.version = VERSION; + this.arrayOffset = 0; + this.bufferOffset = 0; + readVariableBytesFromInputStream(is); + } + + /** + * @param buffer positioned at start of PtBlockMeta + */ + public PrefixTreeBlockMeta(ByteBuffer buffer) { + initOnBlock(buffer); + } + + public void initOnBlock(ByteBuffer buffer) { + arrayOffset = buffer.arrayOffset(); + bufferOffset = buffer.position(); + readVariableBytesFromArray(buffer.array(), arrayOffset + bufferOffset); + } + + + /**************** operate on each field **********************/ + + public int calculateNumMetaBytes(){ + int numBytes = 0; + numBytes += UVIntTool.numBytes(version); + numBytes += UVLongTool.numBytes(numMetaBytes); + numBytes += UVIntTool.numBytes(numKeyValueBytes); + ++numBytes;//os.write(getIncludesMvccVersion()); + + numBytes += UVIntTool.numBytes(numRowBytes); + numBytes += UVIntTool.numBytes(numFamilyBytes); + numBytes += UVIntTool.numBytes(numQualifierBytes); + numBytes += UVIntTool.numBytes(numTimestampBytes); + numBytes += UVIntTool.numBytes(numMvccVersionBytes); + numBytes += UVIntTool.numBytes(numValueBytes); + + numBytes += UVIntTool.numBytes(nextNodeOffsetWidth); + numBytes += UVIntTool.numBytes(familyOffsetWidth); + numBytes += UVIntTool.numBytes(qualifierOffsetWidth); + numBytes += UVIntTool.numBytes(timestampIndexWidth); + numBytes += UVIntTool.numBytes(mvccVersionIndexWidth); + numBytes += UVIntTool.numBytes(valueOffsetWidth); + numBytes += UVIntTool.numBytes(valueLengthWidth); + + numBytes += UVIntTool.numBytes(rowTreeDepth); + numBytes += UVIntTool.numBytes(maxRowLength); + numBytes += UVIntTool.numBytes(maxQualifierLength); + + numBytes += UVLongTool.numBytes(minTimestamp); + numBytes += UVIntTool.numBytes(timestampDeltaWidth); + numBytes += UVLongTool.numBytes(minMvccVersion); + numBytes += UVIntTool.numBytes(mvccVersionDeltaWidth); + ++numBytes;//os.write(getAllSameTypeByte()); + ++numBytes;//os.write(allTypes); + + numBytes += UVIntTool.numBytes(numUniqueRows); + numBytes += UVIntTool.numBytes(numUniqueFamilies); + numBytes += UVIntTool.numBytes(numUniqueQualifiers); + return numBytes; + } + + public void writeVariableBytesToOutputStream(OutputStream os) throws IOException{ + UVIntTool.writeBytes(version, os); + UVIntTool.writeBytes(numMetaBytes, os); + UVIntTool.writeBytes(numKeyValueBytes, os); + os.write(getIncludesMvccVersionByte()); + + UVIntTool.writeBytes(numRowBytes, os); + UVIntTool.writeBytes(numFamilyBytes, os); + UVIntTool.writeBytes(numQualifierBytes, os); + UVIntTool.writeBytes(numTimestampBytes, os); + UVIntTool.writeBytes(numMvccVersionBytes, os); + UVIntTool.writeBytes(numValueBytes, os); + + UVIntTool.writeBytes(nextNodeOffsetWidth, os); + UVIntTool.writeBytes(familyOffsetWidth, os); + UVIntTool.writeBytes(qualifierOffsetWidth, os); + UVIntTool.writeBytes(timestampIndexWidth, os); + UVIntTool.writeBytes(mvccVersionIndexWidth, os); + UVIntTool.writeBytes(valueOffsetWidth, os); + UVIntTool.writeBytes(valueLengthWidth, os); + + UVIntTool.writeBytes(rowTreeDepth, os); + UVIntTool.writeBytes(maxRowLength, os); + UVIntTool.writeBytes(maxQualifierLength, os); + + UVLongTool.writeBytes(minTimestamp, os); + UVIntTool.writeBytes(timestampDeltaWidth, os); + UVLongTool.writeBytes(minMvccVersion, os); + UVIntTool.writeBytes(mvccVersionDeltaWidth, os); + os.write(getAllSameTypeByte()); + os.write(allTypes); + + UVIntTool.writeBytes(numUniqueRows, os); + UVIntTool.writeBytes(numUniqueFamilies, os); + UVIntTool.writeBytes(numUniqueQualifiers, os); + } + + public void readVariableBytesFromInputStream(InputStream is) throws IOException{ + version = UVIntTool.getInt(is); + numMetaBytes = UVIntTool.getInt(is); + numKeyValueBytes = UVIntTool.getInt(is); + setIncludesMvccVersion((byte) is.read()); + + numRowBytes = UVIntTool.getInt(is); + numFamilyBytes = UVIntTool.getInt(is); + numQualifierBytes = UVIntTool.getInt(is); + numTimestampBytes = UVIntTool.getInt(is); + numMvccVersionBytes = UVIntTool.getInt(is); + numValueBytes = UVIntTool.getInt(is); + + nextNodeOffsetWidth = UVIntTool.getInt(is); + familyOffsetWidth = UVIntTool.getInt(is); + qualifierOffsetWidth = UVIntTool.getInt(is); + timestampIndexWidth = UVIntTool.getInt(is); + mvccVersionIndexWidth = UVIntTool.getInt(is); + valueOffsetWidth = UVIntTool.getInt(is); + valueLengthWidth = UVIntTool.getInt(is); + + rowTreeDepth = UVIntTool.getInt(is); + maxRowLength = UVIntTool.getInt(is); + maxQualifierLength = UVIntTool.getInt(is); + + minTimestamp = UVLongTool.getLong(is); + timestampDeltaWidth = UVIntTool.getInt(is); + minMvccVersion = UVLongTool.getLong(is); + mvccVersionDeltaWidth = UVIntTool.getInt(is); + + setAllSameType((byte) is.read()); + allTypes = (byte) is.read(); + + numUniqueRows = UVIntTool.getInt(is); + numUniqueFamilies = UVIntTool.getInt(is); + numUniqueQualifiers = UVIntTool.getInt(is); + } + + public void readVariableBytesFromArray(byte[] bytes, int offset) { + int position = offset; + + version = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(version); + numMetaBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numMetaBytes); + numKeyValueBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numKeyValueBytes); + setIncludesMvccVersion(bytes[position]); + ++position; + + numRowBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numRowBytes); + numFamilyBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numFamilyBytes); + numQualifierBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numQualifierBytes); + numTimestampBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numTimestampBytes); + numMvccVersionBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numMvccVersionBytes); + numValueBytes = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numValueBytes); + + nextNodeOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(nextNodeOffsetWidth); + familyOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(familyOffsetWidth); + qualifierOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(qualifierOffsetWidth); + timestampIndexWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(timestampIndexWidth); + mvccVersionIndexWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(mvccVersionIndexWidth); + valueOffsetWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(valueOffsetWidth); + valueLengthWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(valueLengthWidth); + + rowTreeDepth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(rowTreeDepth); + maxRowLength = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(maxRowLength); + maxQualifierLength = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(maxQualifierLength); + + minTimestamp = UVLongTool.getLong(bytes, position); + position += UVLongTool.numBytes(minTimestamp); + timestampDeltaWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(timestampDeltaWidth); + minMvccVersion = UVLongTool.getLong(bytes, position); + position += UVLongTool.numBytes(minMvccVersion); + mvccVersionDeltaWidth = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(mvccVersionDeltaWidth); + + setAllSameType(bytes[position]); + ++position; + allTypes = bytes[position]; + ++position; + + numUniqueRows = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueRows); + numUniqueFamilies = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueFamilies); + numUniqueQualifiers = UVIntTool.getInt(bytes, position); + position += UVIntTool.numBytes(numUniqueQualifiers); + } + + //TODO method that can read directly from ByteBuffer instead of InputStream + + + /*************** methods *************************/ + + public int getKeyValueTypeWidth() { + return allSameType ? 0 : 1; + } + + public byte getIncludesMvccVersionByte() { + return includesMvccVersion ? (byte) 1 : (byte) 0; + } + + public void setIncludesMvccVersion(byte includesMvccVersionByte) { + includesMvccVersion = includesMvccVersionByte != 0; + } + + public byte getAllSameTypeByte() { + return allSameType ? (byte) 1 : (byte) 0; + } + + public void setAllSameType(byte allSameTypeByte) { + allSameType = allSameTypeByte != 0; + } + + public boolean isAllSameTimestamp() { + return timestampIndexWidth == 0; + } + + public boolean isAllSameMvccVersion() { + return mvccVersionIndexWidth == 0; + } + + public void setTimestampFields(LongEncoder encoder){ + this.minTimestamp = encoder.getMin(); + this.timestampIndexWidth = encoder.getBytesPerIndex(); + this.timestampDeltaWidth = encoder.getBytesPerDelta(); + this.numTimestampBytes = encoder.getTotalCompressedBytes(); + } + + public void setMvccVersionFields(LongEncoder encoder){ + this.minMvccVersion = encoder.getMin(); + this.mvccVersionIndexWidth = encoder.getBytesPerIndex(); + this.mvccVersionDeltaWidth = encoder.getBytesPerDelta(); + this.numMvccVersionBytes = encoder.getTotalCompressedBytes(); + } + + + /*************** Object methods *************************/ + + /** + * Generated by Eclipse + */ + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PrefixTreeBlockMeta other = (PrefixTreeBlockMeta) obj; + if (allSameType != other.allSameType) + return false; + if (allTypes != other.allTypes) + return false; + if (arrayOffset != other.arrayOffset) + return false; + if (bufferOffset != other.bufferOffset) + return false; + if (valueLengthWidth != other.valueLengthWidth) + return false; + if (valueOffsetWidth != other.valueOffsetWidth) + return false; + if (familyOffsetWidth != other.familyOffsetWidth) + return false; + if (includesMvccVersion != other.includesMvccVersion) + return false; + if (maxQualifierLength != other.maxQualifierLength) + return false; + if (maxRowLength != other.maxRowLength) + return false; + if (mvccVersionDeltaWidth != other.mvccVersionDeltaWidth) + return false; + if (mvccVersionIndexWidth != other.mvccVersionIndexWidth) + return false; + if (minMvccVersion != other.minMvccVersion) + return false; + if (minTimestamp != other.minTimestamp) + return false; + if (nextNodeOffsetWidth != other.nextNodeOffsetWidth) + return false; + if (numValueBytes != other.numValueBytes) + return false; + if (numFamilyBytes != other.numFamilyBytes) + return false; + if (numMvccVersionBytes != other.numMvccVersionBytes) + return false; + if (numMetaBytes != other.numMetaBytes) + return false; + if (numQualifierBytes != other.numQualifierBytes) + return false; + if (numRowBytes != other.numRowBytes) + return false; + if (numTimestampBytes != other.numTimestampBytes) + return false; + if (numUniqueFamilies != other.numUniqueFamilies) + return false; + if (numUniqueQualifiers != other.numUniqueQualifiers) + return false; + if (numUniqueRows != other.numUniqueRows) + return false; + if (numKeyValueBytes != other.numKeyValueBytes) + return false; + if (qualifierOffsetWidth != other.qualifierOffsetWidth) + return false; + if (rowTreeDepth != other.rowTreeDepth) + return false; + if (timestampDeltaWidth != other.timestampDeltaWidth) + return false; + if (timestampIndexWidth != other.timestampIndexWidth) + return false; + if (version != other.version) + return false; + return true; + } + + /** + * Generated by Eclipse + */ + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (allSameType ? 1231 : 1237); + result = prime * result + allTypes; + result = prime * result + arrayOffset; + result = prime * result + bufferOffset; + result = prime * result + valueLengthWidth; + result = prime * result + valueOffsetWidth; + result = prime * result + familyOffsetWidth; + result = prime * result + (includesMvccVersion ? 1231 : 1237); + result = prime * result + maxQualifierLength; + result = prime * result + maxRowLength; + result = prime * result + mvccVersionDeltaWidth; + result = prime * result + mvccVersionIndexWidth; + result = prime * result + (int) (minMvccVersion ^ (minMvccVersion >>> 32)); + result = prime * result + (int) (minTimestamp ^ (minTimestamp >>> 32)); + result = prime * result + nextNodeOffsetWidth; + result = prime * result + numValueBytes; + result = prime * result + numFamilyBytes; + result = prime * result + numMvccVersionBytes; + result = prime * result + numMetaBytes; + result = prime * result + numQualifierBytes; + result = prime * result + numRowBytes; + result = prime * result + numTimestampBytes; + result = prime * result + numUniqueFamilies; + result = prime * result + numUniqueQualifiers; + result = prime * result + numUniqueRows; + result = prime * result + numKeyValueBytes; + result = prime * result + qualifierOffsetWidth; + result = prime * result + rowTreeDepth; + result = prime * result + timestampDeltaWidth; + result = prime * result + timestampIndexWidth; + result = prime * result + version; + return result; + } + + /** + * Generated by Eclipse + */ + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("PtBlockMeta [arrayOffset="); + builder.append(arrayOffset); + builder.append(", bufferOffset="); + builder.append(bufferOffset); + builder.append(", version="); + builder.append(version); + builder.append(", numMetaBytes="); + builder.append(numMetaBytes); + builder.append(", numKeyValueBytes="); + builder.append(numKeyValueBytes); + builder.append(", includesMvccVersion="); + builder.append(includesMvccVersion); + builder.append(", numRowBytes="); + builder.append(numRowBytes); + builder.append(", numFamilyBytes="); + builder.append(numFamilyBytes); + builder.append(", numQualifierBytes="); + builder.append(numQualifierBytes); + builder.append(", numTimestampBytes="); + builder.append(numTimestampBytes); + builder.append(", numMvccVersionBytes="); + builder.append(numMvccVersionBytes); + builder.append(", numValueBytes="); + builder.append(numValueBytes); + builder.append(", nextNodeOffsetWidth="); + builder.append(nextNodeOffsetWidth); + builder.append(", familyOffsetWidth="); + builder.append(familyOffsetWidth); + builder.append(", qualifierOffsetWidth="); + builder.append(qualifierOffsetWidth); + builder.append(", timestampIndexWidth="); + builder.append(timestampIndexWidth); + builder.append(", mvccVersionIndexWidth="); + builder.append(mvccVersionIndexWidth); + builder.append(", valueOffsetWidth="); + builder.append(valueOffsetWidth); + builder.append(", valueLengthWidth="); + builder.append(valueLengthWidth); + builder.append(", rowTreeDepth="); + builder.append(rowTreeDepth); + builder.append(", maxRowLength="); + builder.append(maxRowLength); + builder.append(", maxQualifierLength="); + builder.append(maxQualifierLength); + builder.append(", minTimestamp="); + builder.append(minTimestamp); + builder.append(", timestampDeltaWidth="); + builder.append(timestampDeltaWidth); + builder.append(", minMvccVersion="); + builder.append(minMvccVersion); + builder.append(", mvccVersionDeltaWidth="); + builder.append(mvccVersionDeltaWidth); + builder.append(", allSameType="); + builder.append(allSameType); + builder.append(", allTypes="); + builder.append(allTypes); + builder.append(", numUniqueRows="); + builder.append(numUniqueRows); + builder.append(", numUniqueFamilies="); + builder.append(numUniqueFamilies); + builder.append(", numUniqueQualifiers="); + builder.append(numUniqueQualifiers); + builder.append("]"); + return builder.toString(); + } + + + /************** absolute getters *******************/ + + public int getAbsoluteMetaOffset() { + return arrayOffset + bufferOffset; + } + + public int getAbsoluteRowOffset() { + return getAbsoluteMetaOffset() + numMetaBytes; + } + + public int getAbsoluteFamilyOffset() { + return getAbsoluteRowOffset() + numRowBytes; + } + + public int getAbsoluteQualifierOffset() { + return getAbsoluteFamilyOffset() + numFamilyBytes; + } + + public int getAbsoluteTimestampOffset() { + return getAbsoluteQualifierOffset() + numQualifierBytes; + } + + public int getAbsoluteMvccVersionOffset() { + return getAbsoluteTimestampOffset() + numTimestampBytes; + } + + public int getAbsoluteValueOffset() { + return getAbsoluteMvccVersionOffset() + numMvccVersionBytes; + } + + + /*************** get/set ***************************/ + + public int getTimestampDeltaWidth() { + return timestampDeltaWidth; + } + + public void setTimestampDeltaWidth(int timestampDeltaWidth) { + this.timestampDeltaWidth = timestampDeltaWidth; + } + + public int getValueOffsetWidth() { + return valueOffsetWidth; + } + + public void setValueOffsetWidth(int dataOffsetWidth) { + this.valueOffsetWidth = dataOffsetWidth; + } + + public int getValueLengthWidth() { + return valueLengthWidth; + } + + public void setValueLengthWidth(int dataLengthWidth) { + this.valueLengthWidth = dataLengthWidth; + } + + public int getMaxRowLength() { + return maxRowLength; + } + + public void setMaxRowLength(int maxRowLength) { + this.maxRowLength = maxRowLength; + } + + public long getMinTimestamp() { + return minTimestamp; + } + + public void setMinTimestamp(long minTimestamp) { + this.minTimestamp = minTimestamp; + } + + public byte getAllTypes() { + return allTypes; + } + + public void setAllTypes(byte allTypes) { + this.allTypes = allTypes; + } + + public boolean isAllSameType() { + return allSameType; + } + + public void setAllSameType(boolean allSameType) { + this.allSameType = allSameType; + } + + public int getNextNodeOffsetWidth() { + return nextNodeOffsetWidth; + } + + public void setNextNodeOffsetWidth(int nextNodeOffsetWidth) { + this.nextNodeOffsetWidth = nextNodeOffsetWidth; + } + + public int getNumRowBytes() { + return numRowBytes; + } + + public void setNumRowBytes(int numRowBytes) { + this.numRowBytes = numRowBytes; + } + + public int getNumTimestampBytes() { + return numTimestampBytes; + } + + public void setNumTimestampBytes(int numTimestampBytes) { + this.numTimestampBytes = numTimestampBytes; + } + + public int getNumValueBytes() { + return numValueBytes; + } + + public void setNumValueBytes(int numValueBytes) { + this.numValueBytes = numValueBytes; + } + + public int getNumMetaBytes() { + return numMetaBytes; + } + + public void setNumMetaBytes(int numMetaBytes) { + this.numMetaBytes = numMetaBytes; + } + + public int getArrayOffset() { + return arrayOffset; + } + + public void setArrayOffset(int arrayOffset) { + this.arrayOffset = arrayOffset; + } + + public int getBufferOffset() { + return bufferOffset; + } + + public void setBufferOffset(int bufferOffset) { + this.bufferOffset = bufferOffset; + } + + public int getNumKeyValueBytes() { + return numKeyValueBytes; + } + + public void setNumKeyValueBytes(int numKeyValueBytes) { + this.numKeyValueBytes = numKeyValueBytes; + } + + public int getRowTreeDepth() { + return rowTreeDepth; + } + + public void setRowTreeDepth(int rowTreeDepth) { + this.rowTreeDepth = rowTreeDepth; + } + + public int getNumMvccVersionBytes() { + return numMvccVersionBytes; + } + + public void setNumMvccVersionBytes(int numMvccVersionBytes) { + this.numMvccVersionBytes = numMvccVersionBytes; + } + + public int getMvccVersionDeltaWidth() { + return mvccVersionDeltaWidth; + } + + public void setMvccVersionDeltaWidth(int mvccVersionDeltaWidth) { + this.mvccVersionDeltaWidth = mvccVersionDeltaWidth; + } + + public long getMinMvccVersion() { + return minMvccVersion; + } + + public void setMinMvccVersion(long minMvccVersion) { + this.minMvccVersion = minMvccVersion; + } + + public int getNumFamilyBytes() { + return numFamilyBytes; + } + + public void setNumFamilyBytes(int numFamilyBytes) { + this.numFamilyBytes = numFamilyBytes; + } + + public int getFamilyOffsetWidth() { + return familyOffsetWidth; + } + + public void setFamilyOffsetWidth(int familyOffsetWidth) { + this.familyOffsetWidth = familyOffsetWidth; + } + + public int getNumUniqueRows() { + return numUniqueRows; + } + + public void setNumUniqueRows(int numUniqueRows) { + this.numUniqueRows = numUniqueRows; + } + + public int getNumUniqueFamilies() { + return numUniqueFamilies; + } + + public void setNumUniqueFamilies(int numUniqueFamilies) { + this.numUniqueFamilies = numUniqueFamilies; + } + + public int getNumUniqueQualifiers() { + return numUniqueQualifiers; + } + + public void setNumUniqueQualifiers(int numUniqueQualifiers) { + this.numUniqueQualifiers = numUniqueQualifiers; + } + + public int getNumQualifierBytes() { + return numQualifierBytes; + } + + public void setNumQualifierBytes(int numQualifierBytes) { + this.numQualifierBytes = numQualifierBytes; + } + + public int getQualifierOffsetWidth() { + return qualifierOffsetWidth; + } + + public void setQualifierOffsetWidth(int qualifierOffsetWidth) { + this.qualifierOffsetWidth = qualifierOffsetWidth; + } + + public int getMaxQualifierLength() { + return maxQualifierLength; + } + + public void setMaxQualifierLength(int maxQualifierLength) { + this.maxQualifierLength = maxQualifierLength; + } + + public int getTimestampIndexWidth() { + return timestampIndexWidth; + } + + public void setTimestampIndexWidth(int timestampIndexWidth) { + this.timestampIndexWidth = timestampIndexWidth; + } + + public int getMvccVersionIndexWidth() { + return mvccVersionIndexWidth; + } + + public void setMvccVersionIndexWidth(int mvccVersionIndexWidth) { + this.mvccVersionIndexWidth = mvccVersionIndexWidth; + } + + public int getVersion() { + return version; + } + + public void setVersion(int version) { + this.version = version; + } + + public boolean isIncludesMvccVersion() { + return includesMvccVersion; + } + + public void setIncludesMvccVersion(boolean includesMvccVersion) { + this.includesMvccVersion = includesMvccVersion; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java new file mode 100644 index 0000000..3c5349c --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeCodec.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KeyComparator; +import org.apache.hadoop.hbase.KeyValue.MetaKeyComparator; +import org.apache.hadoop.hbase.KeyValue.RootKeyComparator; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; +import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.io.RawComparator; +import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; +import org.apache.hbase.codec.prefixtree.encode.EncoderFactory; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +/** + * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or + * package changes. + *

+ * PrefixTreeDataBlockEncoder implementation of DataBlockEncoder. This is the primary entry point + * for PrefixTree encoding and decoding. Encoding is delegated to instances of + * {@link PrefixTreeEncoder}, and decoding is delegated to instances of + * {@link org.apache.hbase.codec.prefixtree.scanner.CellSearcher}. Encoder and decoder instances are + * created and recycled by static PtEncoderFactory and PtDecoderFactory. + */ +@InterfaceAudience.Private +public class PrefixTreeCodec implements DataBlockEncoder{ + + /** + * no-arg constructor for reflection + */ + public PrefixTreeCodec() { + } + + /** + * Copied from BufferedDataBlockEncoder. Almost definitely can be improved, but i'm not familiar + * enough with the concept of the HFileBlockEncodingContext. + */ + @Override + public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion, + HFileBlockEncodingContext blkEncodingCtx) throws IOException { + if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { + throw new IOException(this.getClass().getName() + " only accepts " + + HFileBlockDefaultEncodingContext.class.getName() + " as the " + "encoding context."); + } + + HFileBlockDefaultEncodingContext encodingCtx + = (HFileBlockDefaultEncodingContext) blkEncodingCtx; + encodingCtx.prepareEncoding(); + DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder(); + internalEncodeKeyValues(dataOut, in, includesMvccVersion); + + //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE? + if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { + encodingCtx.postEncoding(BlockType.ENCODED_DATA); + } else { + encodingCtx.postEncoding(BlockType.DATA); + } + } + + private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, + ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException { + rawKeyValues.rewind(); + PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); + + try{ + KeyValue kv; + while ((kv = KeyValueTool.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) { + builder.write(kv); + } + builder.flush(); + }finally{ + EncoderFactory.checkIn(builder); + } + } + + + @Override + public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion) + throws IOException { + return decodeKeyValues(source, 0, 0, includesMvccVersion); + } + + + /** + * I don't think this method is called during normal HBase operation, so efficiency is not + * important. + */ + @Override + public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, boolean includesMvccVersion) throws IOException { + ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste + sourceAsBuffer.mark(); + PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer); + sourceAsBuffer.rewind(); + int numV1BytesWithHeader = allocateHeaderLength + blockMeta.getNumKeyValueBytes(); + byte[] keyValueBytesWithHeader = new byte[numV1BytesWithHeader]; + ByteBuffer result = ByteBuffer.wrap(keyValueBytesWithHeader); + result.rewind(); + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion); + while (searcher.next()) { + KeyValue currentCell = KeyValueTool.copyToNewKeyValue(searcher.getCurrent()); + // needs to be modified for DirectByteBuffers. no existing methods to + // write VLongs to byte[] + int offset = result.arrayOffset() + result.position(); + KeyValueTool.appendToByteArray(currentCell, result.array(), offset); + int keyValueLength = KeyValueTool.length(currentCell); + ByteBufferUtils.skip(result, keyValueLength); + offset += keyValueLength; + if (includesMvccVersion) { + ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion()); + } + } + result.position(result.limit());//make it appear as if we were appending + return result; + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Override + public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { + block.rewind(); + PrefixTreeArraySearcher searcher = null; + try { + //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will + searcher = DecoderFactory.checkOut(block, true); + if (!searcher.positionAtFirstCell()) { + return null; + } + return KeyValueTool.copyKeyToNewByteBuffer(searcher.getCurrent()); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + @Override + public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm, + DataBlockEncoding encoding, byte[] header) { + if(DataBlockEncoding.PREFIX_TREE != encoding){ + //i'm not sure why encoding is in the interface. Each encoder implementation should probably + //know it's encoding type + throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported"); + } + return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header); + } + + @Override + public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) { + return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + } + + /** + * Is this the correct handling of an illegal comparator? How to prevent that from getting all + * the way to this point. + */ + @Override + public EncodedSeeker createSeeker(RawComparator comparator, boolean includesMvccVersion) { + if(! (comparator instanceof KeyComparator)){ + throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator"); + } + if(comparator instanceof MetaKeyComparator){ + throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with META " + +"table"); + } + if(comparator instanceof RootKeyComparator){ + throw new IllegalArgumentException("DataBlockEncoding.PREFIX_TREE not compatible with ROOT " + +"table"); + } + + return new PrefixTreeSeeker(includesMvccVersion); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java new file mode 100644 index 0000000..fe8d155 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/PrefixTreeSeeker.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.cell.CellTool; +import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; + +/** + * These methods have the same definition as any implementation of the EncodedSeeker. + * + * In the future, the EncodedSeeker could be modified to work with the Cell interface directly. It + * currently returns a new KeyValue object each time getKeyValue is called. This is not horrible, + * but in order to create a new KeyValue object, we must first allocate a new byte[] and copy in + * the data from the PrefixTreeCell. It is somewhat heavyweight right now. + */ +@InterfaceAudience.Private +public class PrefixTreeSeeker implements EncodedSeeker { + + protected ByteBuffer block; + protected boolean includeMvccVersion; + protected PrefixTreeArraySearcher ptSearcher; + + public PrefixTreeSeeker(boolean includeMvccVersion) { + this.includeMvccVersion = includeMvccVersion; + } + + @Override + public void setCurrentBuffer(ByteBuffer fullBlockBuffer) { + block = fullBlockBuffer; + ptSearcher = DecoderFactory.checkOut(block, includeMvccVersion); + rewind(); + } + + /** + * Currently unused. + *

+ * TODO performance leak. should reuse the searchers. hbase does not currently have a hook where + * this can be called + */ + public void releaseCurrentSearcher(){ + DecoderFactory.checkIn(ptSearcher); + } + + + @Override + public ByteBuffer getKeyDeepCopy() { + return KeyValueTool.copyKeyToNewByteBuffer(ptSearcher.getCurrent()); + } + + + @Override + public ByteBuffer getValueShallowCopy() { + return CellTool.getValueBufferShallowCopy(ptSearcher.getCurrent()); + } + + /** + * currently must do deep copy into new array + */ + @Override + public ByteBuffer getKeyValueBuffer() { + return KeyValueTool.copyToNewByteBuffer(ptSearcher.getCurrent()); + } + + /** + * currently must do deep copy into new array + */ + @Override + public KeyValue getKeyValue() { + return KeyValueTool.copyToNewKeyValue(ptSearcher.getCurrent()); + } + + /** + * Currently unused. + *

+ * A nice, lightweight reference, though the underlying cell is transient. This method may return + * the same reference to the backing PrefixTreeCell repeatedly, while other implementations may + * return a different reference for each Cell. + *

+ * The goal will be to transition the upper layers of HBase, like Filters and KeyValueHeap, to use + * this method instead of the getKeyValue() methods above. + */ +// @Override + public Cell getCurrent() { + return ptSearcher.getCurrent(); + } + + @Override + public void rewind() { + ptSearcher.positionAtFirstCell(); + } + + @Override + public boolean next() { + return ptSearcher.next(); + } + +// @Override + public boolean advance() { + return ptSearcher.next(); + } + + + private static final boolean USE_POSITION_BEFORE = false; + + /** + * Seek forward only (should be called reseekToKeyInBlock?). + *

+ * If the exact key is found look at the seekBefore variable and:
+ * - if true: go to the previous key if it's true
+ * - if false: stay on the exact key + *

+ * If the exact key is not found, then go to the previous key *if possible*, but remember to leave + * the scanner in a valid state if possible. + *

+ * @param keyOnlyBytes KeyValue format of a Cell's key at which to position the seeker + * @param offset offset into the keyOnlyBytes array + * @param length number of bytes of the keyOnlyBytes array to use + * @param forceBeforeOnExactMatch if an exact match is found and seekBefore=true, back up one Cell + * @return 0 if the seeker is on the exact key
+ * 1 if the seeker is not on the key for any reason, including seekBefore being true + */ + @Override + public int seekToKeyInBlock(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch) { + if (USE_POSITION_BEFORE) { + return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length, + forceBeforeOnExactMatch); + }else{ + return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length, + forceBeforeOnExactMatch); + } + } + + + + /* + * Support both of these options since the underlying PrefixTree supports both. Possibly + * expand the EncodedSeeker to utilize them both. + */ + + protected int seekToOrBeforeUsingPositionAtOrBefore(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch){ + // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell + KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + + CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv); + + if(CellScannerPosition.AT == position){ + if (forceBeforeOnExactMatch) { + ptSearcher.previous(); + return 1; + } + return 0; + } + + return 1; + } + + + protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length, + boolean forceBeforeOnExactMatch){ + // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell + KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + + //should probably switch this to use the seekForwardToOrBefore method + CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv); + + if(CellScannerPosition.AT == position){ + if (forceBeforeOnExactMatch) { + ptSearcher.previous(); + return 1; + } + return 0; + + } + + if(CellScannerPosition.AFTER == position){ + if(!ptSearcher.isBeforeFirst()){ + ptSearcher.previous(); + } + return 1; + } + + if(position == CellScannerPosition.AFTER_LAST){ + return 1; + } + + throw new RuntimeException("unexpected CellScannerPosition:"+position); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java new file mode 100644 index 0000000..9dcbe63 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/ArraySearcherPool.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import java.nio.ByteBuffer; +import java.util.Queue; +import java.util.concurrent.LinkedBlockingQueue; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Pools PrefixTreeArraySearcher objects. These can consist of hundreds or thousands of objects + * and 1 is needed for each HFile during a Get operation. With tens of thousands of Gets/second, + * reusing these searchers may save a lot of young gen collections. + * + * Alternative implementation would be a ByteBufferSearcherPool (not implemented yet). + */ +@InterfaceAudience.Private +public class ArraySearcherPool { + + protected Queue pool + = new LinkedBlockingQueue(); + + public PrefixTreeArraySearcher checkOut(ByteBuffer buffer, boolean includesMvccVersion) { + PrefixTreeArraySearcher searcher = pool.poll(); + searcher = DecoderFactory.ensureArraySearcherValid(buffer, searcher, includesMvccVersion); + return searcher; + } + + public void checkIn(PrefixTreeArraySearcher searcher) { + searcher.releaseBlockReference(); + pool.offer(searcher); + } + + @Override + public String toString() { + return ("poolSize:" + pool.size()); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java new file mode 100644 index 0000000..fc7a6ce --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/DecoderFactory.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +/** + * Static wrapper class for the ArraySearcherPool. + */ +@InterfaceAudience.Private +public class DecoderFactory { + + protected static ArraySearcherPool ARRAY_BLOCK_SEARCHER_POOL = new ArraySearcherPool(); + + //TODO will need a PrefixTreeSearcher on top of CellSearcher + public static PrefixTreeArraySearcher checkOut(final ByteBuffer buffer, + boolean includeMvccVersion) { + if (buffer.isDirect()) { + throw new IllegalArgumentException("DirectByteBuffers not supported yet"); + // TODO implement PtByteBufferBlockScanner + } + + PrefixTreeArraySearcher searcher = ARRAY_BLOCK_SEARCHER_POOL.checkOut(buffer, + includeMvccVersion); + return searcher; + } + + public static void checkIn(CellSearcher pSearcher) { + if (pSearcher == null) { + return; + } + if (! (pSearcher instanceof PrefixTreeArraySearcher)) { + throw new IllegalArgumentException("Cannot return "+pSearcher.getClass()+" to " + +DecoderFactory.class); + } + PrefixTreeArraySearcher searcher = (PrefixTreeArraySearcher) pSearcher; + ARRAY_BLOCK_SEARCHER_POOL.checkIn(searcher); + } + + + /**************************** helper ******************************/ + + public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer, + PrefixTreeArraySearcher searcher, boolean includeMvccVersion) { + if (searcher == null) { + PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer); + searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(), + blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength()); + searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); + return searcher; + } + + PrefixTreeBlockMeta blockMeta = searcher.getBlockMeta(); + blockMeta.initOnBlock(buffer); + if (!searcher.areBuffersBigEnough()) { + int maxRowTreeStackNodes = Math.max(blockMeta.getRowTreeDepth(), + searcher.getMaxRowTreeStackNodes()); + int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength()); + int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(), + searcher.getQualifierBufferLength()); + searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength, + qualifierBufferLength); + } + searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); + return searcher; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java new file mode 100644 index 0000000..4eaa066 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.scanner.ReversibleCellScanner; + +/** + * Methods for going backwards through a PrefixTree block. This class is split out on its own to + * simplify the Scanner superclass and Searcher subclass. + */ +@InterfaceAudience.Private +public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner implements + ReversibleCellScanner { + + /***************** construct ******************************/ + + public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + } + + + /***************** methods **********************************/ + + @Override + public boolean previous() { + if (afterLast) { + afterLast = false; + positionAtLastCell(); + return true; + } + if (beforeFirst) { + return false; + } + if (isFirstCellInRow()) { + previousRowInternal(); + if (beforeFirst) { + return false; + } + populateLastNonRowFields(); + return true; + } + populatePreviousNonRowFields(); + return true; + } + + @Override + public boolean previousRow(boolean endOfRow) { + previousRowInternal(); + if(beforeFirst){ + return false; + } + if(endOfRow){ + populateLastNonRowFields(); + }else{ + populateFirstNonRowFields(); + } + return true; + } + + private boolean previousRowInternal() { + if (beforeFirst) { + return false; + } + if (afterLast) { + positionAtLastRow(); + return true; + } + if (currentRowNode.hasOccurrences()) { + discardCurrentRowNode(false); + if(currentRowNode==null){ + return false; + } + } + while (!beforeFirst) { + if (isDirectlyAfterNub()) {//we are about to back up to the nub + currentRowNode.resetFanIndex();//sets it to -1, which is before the first leaf + nubCellsRemain = true;//this positions us on the nub + return true; + } + if (currentRowNode.hasPreviousFanNodes()) { + followPreviousFan(); + descendToLastRowFromCurrentPosition(); + } else {// keep going up the stack until we find previous fan positions + discardCurrentRowNode(false); + if(currentRowNode==null){ + return false; + } + } + if (currentRowNode.hasOccurrences()) {// escape clause + return true;// found some values + } + } + return false;// went past the beginning + } + + protected boolean isDirectlyAfterNub() { + return currentRowNode.isNub() && currentRowNode.getFanIndex()==0; + } + + protected void positionAtLastRow() { + reInitFirstNode(); + descendToLastRowFromCurrentPosition(); + } + + protected void descendToLastRowFromCurrentPosition() { + while (currentRowNode.hasChildren()) { + followLastFan(); + } + } + + protected void positionAtLastCell() { + positionAtLastRow(); + populateLastNonRowFields(); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java new file mode 100644 index 0000000..7e372ce --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java @@ -0,0 +1,495 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader; +import org.apache.hbase.codec.prefixtree.decode.row.RowNodeReader; +import org.apache.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder; +import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; +import org.apache.hbase.codec.prefixtree.scanner.CellScanner; + +/** + * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and + * call get/set methods. + * + * This is an "Array" scanner to distinguish from a future "ByteBuffer" scanner. This + * implementation requires that the bytes be in a normal java byte[] for performance. The + * alternative ByteBuffer implementation would allow for accessing data in an off-heap ByteBuffer + * without copying the whole buffer on-heap. + */ +@InterfaceAudience.Private +public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanner { + + /***************** fields ********************************/ + + protected PrefixTreeBlockMeta blockMeta; + + protected boolean beforeFirst; + protected boolean afterLast; + + protected RowNodeReader[] rowNodes; + protected int rowNodeStackIndex; + + protected RowNodeReader currentRowNode; + protected ColumnReader familyReader; + protected ColumnReader qualifierReader; + protected TimestampDecoder timestampDecoder; + protected MvccVersionDecoder mvccVersionDecoder; + + protected boolean nubCellsRemain; + protected int currentCellIndex; + + + /*********************** construct ******************************/ + + // pass in blockMeta so we can initialize buffers big enough for all cells in the block + public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + this.rowNodes = new RowNodeReader[rowTreeDepth]; + for (int i = 0; i < rowNodes.length; ++i) { + rowNodes[i] = new RowNodeReader(); + } + this.rowBuffer = new byte[rowBufferLength]; + this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH]; + this.familyReader = new ColumnReader(familyBuffer, true); + this.qualifierBuffer = new byte[qualifierBufferLength]; + this.qualifierReader = new ColumnReader(qualifierBuffer, false); + this.timestampDecoder = new TimestampDecoder(); + this.mvccVersionDecoder = new MvccVersionDecoder(); + } + + + /**************** init helpers ***************************************/ + + /** + * Call when first accessing a block. + * @return entirely new scanner if false + */ + public boolean areBuffersBigEnough() { + if (rowNodes.length < blockMeta.getRowTreeDepth()) { + return false; + } + if (rowBuffer.length < blockMeta.getMaxRowLength()) { + return false; + } + if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) { + return false; + } + return true; + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, boolean includeMvccVersion) { + this.block = block; + this.blockMeta = blockMeta; + this.familyOffset = familyBuffer.length; + this.familyReader.initOnBlock(blockMeta, block); + this.qualifierOffset = qualifierBuffer.length; + this.qualifierReader.initOnBlock(blockMeta, block); + this.timestampDecoder.initOnBlock(blockMeta, block); + this.mvccVersionDecoder.initOnBlock(blockMeta, block); + this.includeMvccVersion = includeMvccVersion; + resetToBeforeFirstEntry(); + } + + @Override + public void resetToBeforeFirstEntry() { + beforeFirst = true; + afterLast = false; + rowNodeStackIndex = -1; + currentRowNode = null; + rowLength = 0; + familyOffset = familyBuffer.length; + familyLength = 0; + qualifierOffset = blockMeta.getMaxQualifierLength(); + qualifierLength = 0; + nubCellsRemain = false; + currentCellIndex = -1; + timestamp = -1L; + type = DEFAULT_TYPE; + absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized + valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length + } + + /** + * Call this before putting the scanner back into a pool so it doesn't hold the last used block + * in memory. + */ + public void releaseBlockReference(){ + block = null; + } + + + /********************** CellScanner **********************/ + + @Override + public PrefixTreeCell getCurrent() { + if(isOutOfBounds()){ + return null; + } + return this; + } + + + /******************* Object methods ************************/ + + /** + * Override PrefixTreeCell.toString() with a check to see if the current cell is valid. + */ + @Override + public String toString() { + PrefixTreeCell currentCell = getCurrent(); + if(currentCell==null){ + return "null"; + } + return currentCell.getKeyValueString(); + } + + + /******************* advance ***************************/ + + public boolean positionAtFirstCell() { + reInitFirstNode(); + return next(); + } + + @Override + public boolean next() { + if (afterLast) { + return false; + } + if (!hasOccurrences()) { + resetToBeforeFirstEntry(); + } + if (beforeFirst || isLastCellInRow()) { + nextRow(); + if (afterLast) { + return false; + } + } else { + ++currentCellIndex; + } + + populateNonRowFields(currentCellIndex); + return true; + } + + + public boolean nextRow() { + nextRowInternal(); + if (afterLast) { + return false; + } + populateNonRowFields(currentCellIndex); + return true; + } + + + /** + * This method is safe to call when the scanner is not on a fully valid row node, as in the case + * of a row token miss in the Searcher + * @return true if we are positioned on a valid row, false if past end of block + */ + protected boolean nextRowInternal() { + if (afterLast) { + return false; + } + if (beforeFirst) { + initFirstNode(); + if (currentRowNode.hasOccurrences()) { + if (currentRowNode.isNub()) { + nubCellsRemain = true; + } + currentCellIndex = 0; + return true; + } + } + if (currentRowNode.isLeaf()) { + discardCurrentRowNode(true); + } + while (!afterLast) { + if (nubCellsRemain) { + nubCellsRemain = false; + } + if (currentRowNode.hasMoreFanNodes()) { + followNextFan(); + if (currentRowNode.hasOccurrences()) { + currentCellIndex = 0; + return true; + }// found some values + } else { + discardCurrentRowNode(true); + } + } + return false;// went past the end + } + + + /**************** secondary traversal methods ******************************/ + + protected void reInitFirstNode() { + resetToBeforeFirstEntry(); + initFirstNode(); + } + + protected void initFirstNode() { + int offsetIntoUnderlyingStructure = blockMeta.getAbsoluteRowOffset(); + rowNodeStackIndex = 0; + currentRowNode = rowNodes[0]; + currentRowNode.initOnBlock(blockMeta, block, offsetIntoUnderlyingStructure); + appendCurrentTokenToRowBuffer(); + beforeFirst = false; + } + + protected void followFirstFan() { + followFan(0); + } + + protected void followPreviousFan() { + int nextFanPosition = currentRowNode.getFanIndex() - 1; + followFan(nextFanPosition); + } + + protected void followCurrentFan() { + int currentFanPosition = currentRowNode.getFanIndex(); + followFan(currentFanPosition); + } + + protected void followNextFan() { + int nextFanPosition = currentRowNode.getFanIndex() + 1; + followFan(nextFanPosition); + } + + protected void followLastFan() { + followFan(currentRowNode.getLastFanIndex()); + } + + protected void followFan(int fanIndex) { + currentRowNode.setFanIndex(fanIndex); + appendToRowBuffer(currentRowNode.getFanByte(fanIndex)); + + int nextOffsetIntoUnderlyingStructure = currentRowNode.getOffset() + + currentRowNode.getNextNodeOffset(fanIndex, blockMeta); + ++rowNodeStackIndex; + + currentRowNode = rowNodes[rowNodeStackIndex]; + currentRowNode.initOnBlock(blockMeta, block, nextOffsetIntoUnderlyingStructure); + + //TODO getToken is spewing garbage + appendCurrentTokenToRowBuffer(); + if (currentRowNode.isNub()) { + nubCellsRemain = true; + } + currentCellIndex = 0; + } + + /** + * @param forwards which marker to set if we overflow + */ + protected void discardCurrentRowNode(boolean forwards) { + RowNodeReader rowNodeBeingPopped = currentRowNode; + --rowNodeStackIndex;// pop it off the stack + if (rowNodeStackIndex < 0) { + currentRowNode = null; + if (forwards) { + markAfterLast(); + } else { + markBeforeFirst(); + } + return; + } + popFromRowBuffer(rowNodeBeingPopped); + currentRowNode = rowNodes[rowNodeStackIndex]; + } + + protected void markBeforeFirst() { + beforeFirst = true; + afterLast = false; + currentRowNode = null; + } + + protected void markAfterLast() { + beforeFirst = false; + afterLast = true; + currentRowNode = null; + } + + + /***************** helper methods **************************/ + + protected void appendCurrentTokenToRowBuffer() { + System.arraycopy(block, currentRowNode.getTokenArrayOffset(), rowBuffer, rowLength, + currentRowNode.getTokenLength()); + rowLength += currentRowNode.getTokenLength(); + } + + protected void appendToRowBuffer(byte b) { + rowBuffer[rowLength] = b; + ++rowLength; + } + + protected void popFromRowBuffer(RowNodeReader rowNodeBeingPopped) { + rowLength -= rowNodeBeingPopped.getTokenLength(); + --rowLength; // pop the parent's fan byte + } + + protected boolean hasOccurrences() { + return currentRowNode != null && currentRowNode.hasOccurrences(); + } + + protected boolean isBranch() { + return currentRowNode != null && !currentRowNode.hasOccurrences() + && currentRowNode.hasChildren(); + } + + protected boolean isNub() { + return currentRowNode != null && currentRowNode.hasOccurrences() + && currentRowNode.hasChildren(); + } + + protected boolean isLeaf() { + return currentRowNode != null && currentRowNode.hasOccurrences() + && !currentRowNode.hasChildren(); + } + + //TODO expose this in a PrefixTreeScanner interface + public boolean isBeforeFirst(){ + return beforeFirst; + } + + public boolean isAfterLast(){ + return afterLast; + } + + protected boolean isOutOfBounds(){ + return beforeFirst || afterLast; + } + + protected boolean isFirstCellInRow() { + return currentCellIndex == 0; + } + + protected boolean isLastCellInRow() { + return currentCellIndex == currentRowNode.getLastCellIndex(); + } + + + /********************* fill in family/qualifier/ts/type/value ************/ + + protected int populateNonRowFieldsAndCompareTo(int cellNum, Cell key) { + populateNonRowFields(cellNum); + return CellComparator.compareStatic(this, key); + } + + protected void populateFirstNonRowFields() { + populateNonRowFields(0); + } + + protected void populatePreviousNonRowFields() { + populateNonRowFields(currentCellIndex - 1); + } + + protected void populateLastNonRowFields() { + populateNonRowFields(currentRowNode.getLastCellIndex()); + } + + protected void populateNonRowFields(int cellIndex) { + currentCellIndex = cellIndex; + populateFamily(); + populateQualifier(); + populateTimestamp(); + populateMvccVersion(); + populateType(); + populateValueOffsets(); + } + + protected void populateFamily() { + int familyTreeIndex = currentRowNode.getFamilyOffset(currentCellIndex, blockMeta); + familyOffset = familyReader.populateBuffer(familyTreeIndex).getColumnOffset(); + familyLength = familyReader.getColumnLength(); + } + + protected void populateQualifier() { + int qualifierTreeIndex = currentRowNode.getColumnOffset(currentCellIndex, blockMeta); + qualifierOffset = qualifierReader.populateBuffer(qualifierTreeIndex).getColumnOffset(); + qualifierLength = qualifierReader.getColumnLength(); + } + + protected void populateTimestamp() { + if (blockMeta.isAllSameTimestamp()) { + timestamp = blockMeta.getMinTimestamp(); + } else { + int timestampIndex = currentRowNode.getTimestampIndex(currentCellIndex, blockMeta); + timestamp = timestampDecoder.getLong(timestampIndex); + } + } + + protected void populateMvccVersion() { + if (blockMeta.isAllSameMvccVersion()) { + mvccVersion = blockMeta.getMinMvccVersion(); + } else { + int mvccVersionIndex = currentRowNode.getMvccVersionIndex(currentCellIndex, + blockMeta); + mvccVersion = mvccVersionDecoder.getMvccVersion(mvccVersionIndex); + } + } + + protected void populateType() { + int typeInt; + if (blockMeta.isAllSameType()) { + typeInt = blockMeta.getAllTypes(); + } else { + typeInt = currentRowNode.getType(currentCellIndex, blockMeta); + } + type = PrefixTreeCell.TYPES[typeInt]; + } + + protected void populateValueOffsets() { + int offsetIntoValueSection = currentRowNode.getValueOffset(currentCellIndex, blockMeta); + absoluteValueOffset = blockMeta.getAbsoluteValueOffset() + offsetIntoValueSection; + valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta); + } + + + /**************** getters ***************************/ + + public byte[] getTreeBytes() { + return block; + } + + public PrefixTreeBlockMeta getBlockMeta() { + return blockMeta; + } + + public int getMaxRowTreeStackNodes() { + return rowNodes.length; + } + + public int getRowBufferLength() { + return rowBuffer.length; + } + + public int getQualifierBufferLength() { + return qualifierBuffer.length; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java new file mode 100644 index 0000000..22d696b --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java @@ -0,0 +1,393 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.cell.CellTool; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.primitives.UnsignedBytes; + +/** + * Searcher extends the capabilities of the Scanner + ReversibleScanner to add the ability to + * position itself on a requested Cell without scanning through cells before it. The PrefixTree is + * set up to be a Trie of rows, so finding a particular row is extremely cheap. + *

+ * Once it finds the row, it does a binary search through the cells inside the row, which is not as + * fast as the trie search, but faster than iterating through every cell like existing block formats + * do. For this reason, this implementation is targeted towards schemas where rows are narrow enough + * to have several or many per block, and where you are generally looking for the entire row or the + * first cell. It will still be fast for wide rows or point queries, but could be improved upon. + */ +@InterfaceAudience.Private +public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner implements + CellSearcher { + + /*************** construct ******************************/ + + public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + } + + + /********************* CellSearcher methods *******************/ + + @Override + public boolean positionAt(Cell key) { + return CellScannerPosition.AT == positionAtOrAfter(key); + } + + @Override + public CellScannerPosition positionAtOrBefore(Cell key) { + reInitFirstNode(); + int fanIndex = -1; + + while(true){ + //detect row mismatch. break loop if mismatch + int currentNodeDepth = rowLength; + int rowTokenComparison = compareToCurrentToken(key); + if(rowTokenComparison != 0){ + return fixRowTokenMissReverse(rowTokenComparison); + } + + //exact row found, move on to qualifier & ts + if(rowMatchesAfterCurrentPosition(key)){ + return positionAtQualifierTimestamp(key, true); + } + + //detect dead end (no fan to descend into) + if(!currentRowNode.hasFan()){ + if(hasOccurrences()){//must be leaf or nub + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + }else{ + //TODO i don't think this case is exercised by any tests + return fixRowFanMissReverse(0); + } + } + + //keep hunting for the rest of the row + byte searchForByte = CellTool.getRowByte(key, currentNodeDepth); + fanIndex = currentRowNode.whichFanNode(searchForByte); + if(fanIndex < 0){//no matching row. return early + int insertionPoint = -fanIndex; + return fixRowFanMissReverse(insertionPoint); + } + //found a match, so dig deeper into the tree + followFan(fanIndex); + } + } + + /** + * Identical workflow as positionAtOrBefore, but split them to avoid having ~10 extra + * if-statements. Priority on readability and debugability. + */ + @Override + public CellScannerPosition positionAtOrAfter(Cell key) { + reInitFirstNode(); + int fanIndex = -1; + + while(true){ + //detect row mismatch. break loop if mismatch + int currentNodeDepth = rowLength; + int rowTokenComparison = compareToCurrentToken(key); + if(rowTokenComparison != 0){ + return fixRowTokenMissForward(rowTokenComparison); + } + + //exact row found, move on to qualifier & ts + if(rowMatchesAfterCurrentPosition(key)){ + return positionAtQualifierTimestamp(key, false); + } + + //detect dead end (no fan to descend into) + if(!currentRowNode.hasFan()){ + if(hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + }else{ + //TODO i don't think this case is exercised by any tests + return fixRowFanMissForward(0); + } + } + + //keep hunting for the rest of the row + byte searchForByte = CellTool.getRowByte(key, currentNodeDepth); + fanIndex = currentRowNode.whichFanNode(searchForByte); + if(fanIndex < 0){//no matching row. return early + int insertionPoint = -fanIndex; + return fixRowFanMissForward(insertionPoint); + } + //found a match, so dig deeper into the tree + followFan(fanIndex); + } + } + + @Override + public boolean seekForwardTo(Cell key) { + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return false; + } + return positionAt(key); + } + + @Override + public CellScannerPosition seekForwardToOrBefore(Cell key) { + //Do we even need this check or should upper layers avoid this situation. It's relatively + //expensive compared to the rest of the seek operation. + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return CellScannerPosition.AFTER; + } + + return positionAtOrBefore(key); + } + + @Override + public CellScannerPosition seekForwardToOrAfter(Cell key) { + //Do we even need this check or should upper layers avoid this situation. It's relatively + //expensive compared to the rest of the seek operation. + if(currentPositionIsAfter(key)){ + //our position is after the requested key, so can't do anything + return CellScannerPosition.AFTER; + } + + return positionAtOrAfter(key); + } + + /** + * The content of the buffers doesn't matter here, only that afterLast=true and beforeFirst=false + */ + @Override + public void positionAfterLastCell() { + resetToBeforeFirstEntry(); + beforeFirst = false; + afterLast = true; + } + + + /****************** internal methods ************************/ + + protected boolean currentPositionIsAfter(Cell cell){ + return compareTo(cell) > 0; + } + + protected CellScannerPosition positionAtQualifierTimestamp(Cell key, boolean beforeOnMiss) { + int minIndex = 0; + int maxIndex = currentRowNode.getLastCellIndex(); + int diff; + while (true) { + int midIndex = (maxIndex + minIndex) / 2;//don't worry about overflow + diff = populateNonRowFieldsAndCompareTo(midIndex, key); + + if (diff == 0) {// found exact match + return CellScannerPosition.AT; + } else if (minIndex == maxIndex) {// even termination case + break; + } else if ((minIndex + 1) == maxIndex) {// odd termination case + diff = populateNonRowFieldsAndCompareTo(maxIndex, key); + if(diff > 0){ + diff = populateNonRowFieldsAndCompareTo(minIndex, key); + } + break; + } else if (diff < 0) {// keep going forward + minIndex = currentCellIndex; + } else {// went past it, back up + maxIndex = currentCellIndex; + } + } + + if (diff == 0) { + return CellScannerPosition.AT; + + } else if (diff < 0) {// we are before key + if (beforeOnMiss) { + return CellScannerPosition.BEFORE; + } + if (next()) { + return CellScannerPosition.AFTER; + } + return CellScannerPosition.AFTER_LAST; + + } else {// we are after key + if (!beforeOnMiss) { + return CellScannerPosition.AFTER; + } + if (previous()) { + return CellScannerPosition.BEFORE; + } + return CellScannerPosition.BEFORE_FIRST; + } + } + + /** + * compare this.row to key.row but starting at the current rowLength + * @param key Cell being searched for + * @return true if row buffer contents match key.row + */ + protected boolean rowMatchesAfterCurrentPosition(Cell key) { + if (!currentRowNode.hasOccurrences()) { + return false; + } + int thatRowLength = key.getRowLength(); + if (rowLength != thatRowLength) { + return false; + } + return true; + } + + // TODO move part of this to Cell comparator? + /** + * Compare only the bytes within the window of the current token + * @param key + * @return return -1 if key is lessThan (before) this, 0 if equal, and 1 if key is after + */ + protected int compareToCurrentToken(Cell key) { + int startIndex = rowLength - currentRowNode.getTokenLength(); + int endIndexExclusive = startIndex + currentRowNode.getTokenLength(); + for (int i = startIndex; i < endIndexExclusive; ++i) { + if (i >= key.getRowLength()) {// key was shorter, so it's first + return -1; + } + byte keyByte = CellTool.getRowByte(key, i); + byte thisByte = rowBuffer[i]; + if (keyByte == thisByte) { + continue; + } + return UnsignedBytes.compare(keyByte, thisByte); + } + return 0; + } + + protected void followLastFansUntilExhausted(){ + while(currentRowNode.hasFan()){ + followLastFan(); + } + } + + + /****************** complete seek when token mismatch ******************/ + + /** + * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position + */ + protected CellScannerPosition fixRowTokenMissReverse(int searcherIsAfterInputKey) { + if (searcherIsAfterInputKey < 0) {//searcher position is after the input key, so back up + boolean foundPreviousRow = previousRow(true); + if(foundPreviousRow){ + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + }else{ + return CellScannerPosition.BEFORE_FIRST; + } + + }else{//searcher position is before the input key + if(currentRowNode.hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.BEFORE; + } + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + } + + /** + * @param searcherIsAfterInputKey <0: input key is before the searcher's position
+ * >0: input key is after the searcher's position + */ + protected CellScannerPosition fixRowTokenMissForward(int searcherIsAfterInputKey) { + if (searcherIsAfterInputKey < 0) {//searcher position is after the input key + if(currentRowNode.hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + } + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + + }else{//searcher position is before the input key, so go forward + discardCurrentRowNode(true); + boolean foundNextRow = nextRow(); + if(foundNextRow){ + return CellScannerPosition.AFTER; + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + } + + + /****************** complete seek when fan mismatch ******************/ + + protected CellScannerPosition fixRowFanMissReverse(int fanInsertionPoint){ + if(fanInsertionPoint == 0){//we need to back up a row + boolean foundPreviousRow = previousRow(true);//true -> position on last cell in row + if(foundPreviousRow){ + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + } + return CellScannerPosition.BEFORE_FIRST; + } + + //follow the previous fan, but then descend recursively forward + followFan(fanInsertionPoint - 1); + followLastFansUntilExhausted(); + populateLastNonRowFields(); + return CellScannerPosition.BEFORE; + } + + protected CellScannerPosition fixRowFanMissForward(int fanInsertionPoint){ + if(fanInsertionPoint >= currentRowNode.getFanOut()){ + discardCurrentRowNode(true); + if (!nextRow()) { + return CellScannerPosition.AFTER_LAST; + } else { + return CellScannerPosition.AFTER; + } + } + + followFan(fanInsertionPoint); + if(hasOccurrences()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + } + + if(nextRowInternal()){ + populateFirstNonRowFields(); + return CellScannerPosition.AFTER; + + }else{ + return CellScannerPosition.AFTER_LAST; + } + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java new file mode 100644 index 0000000..d34014b --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/PrefixTreeCell.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; + +/** + * As the PrefixTreeArrayScanner moves through the tree bytes, it changes the values in the fields + * of this class so that Cell logic can be applied, but without allocating new memory for every Cell + * iterated through. + */ +@InterfaceAudience.Private +public class PrefixTreeCell implements Cell, Comparable { + + /********************** static **********************/ + + public static final KeyValue.Type[] TYPES = new KeyValue.Type[256]; + static { + for (KeyValue.Type type : KeyValue.Type.values()) { + TYPES[type.getCode() & 0xff] = type; + } + } + + //Same as KeyValue constructor. Only used to avoid NPE's when full cell hasn't been initialized. + public static final KeyValue.Type DEFAULT_TYPE = KeyValue.Type.Put; + + /******************** fields ************************/ + + protected byte[] block; + //we could also avoid setting the mvccVersion in the scanner/searcher, but this is simpler + protected boolean includeMvccVersion; + + protected byte[] rowBuffer; + protected int rowLength; + + protected byte[] familyBuffer; + protected int familyOffset; + protected int familyLength; + + protected byte[] qualifierBuffer;// aligned to the end of the array + protected int qualifierOffset; + protected int qualifierLength; + + protected Long timestamp; + protected Long mvccVersion; + + protected KeyValue.Type type; + + protected int absoluteValueOffset; + protected int valueLength; + + + /********************** Cell methods ******************/ + + /** + * For debugging. Currently creates new KeyValue to utilize its toString() method. + */ + @Override + public String toString() { + return getKeyValueString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Cell)) { + return false; + } + //Temporary hack to maintain backwards compatibility with KeyValue.equals + return CellComparator.equalsIgnoreMvccVersion(this, (Cell)obj); + + //TODO return CellComparator.equals(this, (Cell)obj);//see HBASE-6907 + } + + @Override + public int hashCode(){ + //Temporary hack to maintain backwards compatibility with KeyValue.hashCode + //I don't think this is used in any hot code paths + return KeyValueTool.copyToNewKeyValue(this).hashCode(); + + //TODO return CellComparator.hashCode(this);//see HBASE-6907 + } + + @Override + public int compareTo(Cell other) { + return CellComparator.compareStatic(this, other); + } + + @Override + public long getTimestamp() { + return timestamp; + } + + @Override + public long getMvccVersion() { + if (!includeMvccVersion) { + return 0L; + } + return mvccVersion; + } + + @Override + public int getValueLength() { + return valueLength; + } + + @Override + public byte[] getRowArray() { + return rowBuffer; + } + + @Override + public int getRowOffset() { + return 0; + } + + @Override + public short getRowLength() { + return (short) rowLength; + } + + @Override + public byte[] getFamilyArray() { + return familyBuffer; + } + + @Override + public int getFamilyOffset() { + return familyOffset; + } + + @Override + public byte getFamilyLength() { + return (byte) familyLength; + } + + @Override + public byte[] getQualifierArray() { + return qualifierBuffer; + } + + @Override + public int getQualifierOffset() { + return qualifierOffset; + } + + @Override + public int getQualifierLength() { + return qualifierLength; + } + + @Override + public byte[] getValueArray() { + return block; + } + + @Override + public int getValueOffset() { + return absoluteValueOffset; + } + + @Override + public byte getTypeByte() { + return type.getCode(); + } + + + /************************* helper methods *************************/ + + /** + * Need this separate method so we can call it from subclasses' toString() methods + */ + protected String getKeyValueString(){ + KeyValue kv = KeyValueTool.copyToNewKeyValue(this); + return kv.toString(); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java new file mode 100644 index 0000000..1623876 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode.column; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.util.vint.UFIntTool; +import org.apache.hbase.util.vint.UVIntTool; + +@InterfaceAudience.Private +public class ColumnNodeReader { + + /**************** fields ************************/ + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + protected byte[] columnBuffer; + protected boolean familyVsQualifier; + + protected int offsetIntoBlock; + + protected int tokenOffsetIntoBlock; + protected int tokenLength; + protected int parentStartPosition; + + + /************** construct *************************/ + + public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) { + this.columnBuffer = columnBuffer; + this.familyVsQualifier = familyVsQualifier; + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.blockMeta = blockMeta; + this.block = block; + } + + + /************* methods *****************************/ + + public void positionAt(int offsetIntoBlock) { + this.offsetIntoBlock = offsetIntoBlock; + tokenLength = UVIntTool.getInt(block, offsetIntoBlock); + tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength); + int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength; + int offsetWidth; + if (familyVsQualifier) { + offsetWidth = blockMeta.getFamilyOffsetWidth(); + } else { + offsetWidth = blockMeta.getQualifierOffsetWidth(); + } + parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth); + } + + public void prependTokenToBuffer(int bufferStartIndex) { + System.arraycopy(block, tokenOffsetIntoBlock, columnBuffer, bufferStartIndex, tokenLength); + } + + public boolean isRoot() { + if (familyVsQualifier) { + return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset(); + } else { + return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset(); + } + } + + + /************** standard methods *********************/ + + @Override + public String toString() { + return super.toString() + "[" + offsetIntoBlock + "]"; + } + + + /****************** get/set ****************************/ + + public int getTokenLength() { + return tokenLength; + } + + public int getParentStartPosition() { + return parentStartPosition; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java new file mode 100644 index 0000000..304a4b3 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/column/ColumnReader.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode.column; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; + +/** + * Position one of these appropriately in the data block and you can call its methods to retrieve + * the family or qualifier at the current position. + */ +@InterfaceAudience.Private +public class ColumnReader { + + /****************** fields *************************/ + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + protected byte[] columnBuffer; + protected int columnOffset; + protected int columnLength; + protected boolean familyVsQualifier; + + protected ColumnNodeReader columnNodeReader; + + + /******************** construct *******************/ + + public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) { + this.columnBuffer = columnBuffer; + this.familyVsQualifier = familyVsQualifier; + this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier); + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.blockMeta = blockMeta; + this.block = block; + clearColumnBuffer(); + columnNodeReader.initOnBlock(blockMeta, block); + } + + + /********************* methods *******************/ + + public ColumnReader populateBuffer(int offsetIntoColumnData) { + clearColumnBuffer(); + int nextRelativeOffset = offsetIntoColumnData; + while (true) { + int absoluteOffset; + if (familyVsQualifier) { + absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset; + } else { + absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset; + } + columnNodeReader.positionAt(absoluteOffset); + columnOffset -= columnNodeReader.getTokenLength(); + columnLength += columnNodeReader.getTokenLength(); + columnNodeReader.prependTokenToBuffer(columnOffset); + if (columnNodeReader.isRoot()) { + return this; + } + nextRelativeOffset = columnNodeReader.getParentStartPosition(); + } + } + + public byte[] copyBufferToNewArray() {// for testing + byte[] out = new byte[columnLength]; + System.arraycopy(columnBuffer, columnOffset, out, 0, out.length); + return out; + } + + public int getColumnLength() { + return columnLength; + } + + public void clearColumnBuffer() { + columnOffset = columnBuffer.length; + columnLength = 0; + } + + + /****************************** get/set *************************************/ + + public int getColumnOffset() { + return columnOffset; + } + +} + diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java new file mode 100644 index 0000000..3eb8464 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/row/RowNodeReader.java @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode.row; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.util.vint.UFIntTool; +import org.apache.hbase.util.vint.UVIntTool; + +/** + * Position one of these appropriately in the data block and you can call its methods to retrieve + * information necessary to decode the cells in the row. + */ +@InterfaceAudience.Private +public class RowNodeReader { + + /************* fields ***********************************/ + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + protected int offset; + protected int fanIndex; + + protected int numCells; + + protected int tokenOffset; + protected int tokenLength; + protected int fanOffset; + protected int fanOut; + + protected int familyOffsetsOffset; + protected int qualifierOffsetsOffset; + protected int timestampIndexesOffset; + protected int mvccVersionIndexesOffset; + protected int operationTypesOffset; + protected int valueOffsetsOffset; + protected int valueLengthsOffset; + protected int nextNodeOffsetsOffset; + + + /******************* construct **************************/ + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) { + this.blockMeta = blockMeta; + this.block = block; + + this.offset = offset; + resetFanIndex(); + + this.tokenLength = UVIntTool.getInt(block, offset); + this.tokenOffset = offset + UVIntTool.numBytes(tokenLength); + + this.fanOut = UVIntTool.getInt(block, tokenOffset + tokenLength); + this.fanOffset = tokenOffset + tokenLength + UVIntTool.numBytes(fanOut); + + this.numCells = UVIntTool.getInt(block, fanOffset + fanOut); + + this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells); + this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth(); + this.timestampIndexesOffset = qualifierOffsetsOffset + numCells + * blockMeta.getQualifierOffsetWidth(); + this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells + * blockMeta.getTimestampIndexWidth(); + this.operationTypesOffset = mvccVersionIndexesOffset + numCells + * blockMeta.getMvccVersionIndexWidth(); + this.valueOffsetsOffset = operationTypesOffset + numCells * blockMeta.getKeyValueTypeWidth(); + this.valueLengthsOffset = valueOffsetsOffset + numCells * blockMeta.getValueOffsetWidth(); + this.nextNodeOffsetsOffset = valueLengthsOffset + numCells * blockMeta.getValueLengthWidth(); + } + + + /******************** methods ****************************/ + + public boolean isLeaf() { + return fanOut == 0; + } + + public boolean isNub() { + return fanOut > 0 && numCells > 0; + } + + public boolean isBranch() { + return fanOut > 0 && numCells == 0; + } + + public boolean hasOccurrences() { + return numCells > 0; + } + + public int getTokenArrayOffset(){ + return tokenOffset; + } + + public int getTokenLength() { + return tokenLength; + } + + public byte getFanByte(int i) { + return block[fanOffset + i]; + } + + /** + * for debugging + */ + protected String getFanByteReadable(int i){ + return Bytes.toStringBinary(block, fanOffset + i, 1); + } + + public int getFamilyOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getFamilyOffsetWidth(); + int startIndex = familyOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getColumnOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getQualifierOffsetWidth(); + int startIndex = qualifierOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getTimestampIndexWidth(); + int startIndex = timestampIndexesOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getMvccVersionIndex(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getMvccVersionIndexWidth(); + int startIndex = mvccVersionIndexesOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public int getType(int index, PrefixTreeBlockMeta blockMeta) { + if (blockMeta.isAllSameType()) { + return blockMeta.getAllTypes(); + } + return block[operationTypesOffset + index]; + } + + public int getValueOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getValueOffsetWidth(); + int startIndex = valueOffsetsOffset + fIntWidth * index; + int offset = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + return offset; + } + + public int getValueLength(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getValueLengthWidth(); + int startIndex = valueLengthsOffset + fIntWidth * index; + int length = (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + return length; + } + + public int getNextNodeOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getNextNodeOffsetWidth(); + int startIndex = nextNodeOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + + public String getBranchNubLeafIndicator() { + if (isNub()) { + return "N"; + } + return isBranch() ? "B" : "L"; + } + + public boolean hasChildren() { + return fanOut > 0; + } + + public int getLastFanIndex() { + return fanOut - 1; + } + + public int getLastCellIndex() { + return numCells - 1; + } + + public int getNumCells() { + return numCells; + } + + public int getFanOut() { + return fanOut; + } + + public byte[] getToken() { + // TODO pass in reusable ByteRange + return new ByteRange(block, tokenOffset, tokenLength).deepCopyToNewArray(); + } + + public int getOffset() { + return offset; + } + + public int whichFanNode(byte searchForByte) { + if( ! hasFan()){ + throw new IllegalStateException("This row node has no fan, so can't search it"); + } + int fanIndexInBlock = Bytes.unsignedBinarySearch(block, fanOffset, fanOffset + fanOut, + searchForByte); + if (fanIndexInBlock >= 0) {// found it, but need to adjust for position of fan in overall block + return fanIndexInBlock - fanOffset; + } + return fanIndexInBlock + fanOffset + 1;// didn't find it, so compensate in reverse + } + + public void resetFanIndex() { + fanIndex = -1;// just the way the logic currently works + } + + public int getFanIndex() { + return fanIndex; + } + + public void setFanIndex(int fanIndex) { + this.fanIndex = fanIndex; + } + + public boolean hasFan(){ + return fanOut > 0; + } + + public boolean hasPreviousFanNodes() { + return fanOut > 0 && fanIndex > 0; + } + + public boolean hasMoreFanNodes() { + return fanIndex < getLastFanIndex(); + } + + public boolean isOnLastFanNode() { + return !hasMoreFanNodes(); + } + + + /*************** standard methods **************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("fan:" + Bytes.toStringBinary(block, fanOffset, fanOut)); + sb.append(",token:" + Bytes.toStringBinary(block, tokenOffset, tokenLength)); + sb.append(",numCells:" + numCells); + sb.append(",fanIndex:"+fanIndex); + if(fanIndex>=0){ + sb.append("("+getFanByteReadable(fanIndex)+")"); + } + return sb.toString(); + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java new file mode 100644 index 0000000..5a88fdf --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/MvccVersionDecoder.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode.timestamp; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.util.vint.UFIntTool; + +/** + * Given a block and its blockMeta, this will decode the MvccVersion for the i-th Cell in the block. + */ +@InterfaceAudience.Private +public class MvccVersionDecoder { + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + + /************** construct ***********************/ + + public MvccVersionDecoder() { + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.block = block; + this.blockMeta = blockMeta; + } + + + /************** methods *************************/ + + public long getMvccVersion(int index) { + if (blockMeta.getMvccVersionIndexWidth() == 0) {//all mvccVersions in the block were identical + return blockMeta.getMinMvccVersion(); + } + int startIndex = blockMeta.getAbsoluteMvccVersionOffset() + + blockMeta.getMvccVersionDeltaWidth() * index; + long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getMvccVersionDeltaWidth()); + return blockMeta.getMinMvccVersion() + delta; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java new file mode 100644 index 0000000..b3e122a --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/decode/timestamp/TimestampDecoder.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.decode.timestamp; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.util.vint.UFIntTool; + +/** + * Given a block and its blockMeta, this will decode the timestamp for the i-th Cell in the block. + */ +@InterfaceAudience.Private +public class TimestampDecoder { + + protected PrefixTreeBlockMeta blockMeta; + protected byte[] block; + + + /************** construct ***********************/ + + public TimestampDecoder() { + } + + public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { + this.block = block; + this.blockMeta = blockMeta; + } + + + /************** methods *************************/ + + public long getLong(int index) { + if (blockMeta.getTimestampIndexWidth() == 0) {//all timestamps in the block were identical + return blockMeta.getMinTimestamp(); + } + int startIndex = blockMeta.getAbsoluteTimestampOffset() + blockMeta.getTimestampDeltaWidth() + * index; + long delta = UFIntTool.fromBytes(block, startIndex, blockMeta.getTimestampDeltaWidth()); + return blockMeta.getMinTimestamp() + delta; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java new file mode 100644 index 0000000..84cd4e8 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderFactory.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Retrieve PrefixTreeEncoders from this factory which handles pooling them and preparing the + * ones retrieved from the pool for usage. + */ +@InterfaceAudience.Private +public class EncoderFactory { + + protected static EncoderPool POOL = new ThreadLocalEncoderPool(); + + + public static PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion) { + return POOL.checkOut(outputStream, includeMvccVersion); + } + + public static void checkIn(PrefixTreeEncoder encoder) { + POOL.checkIn(encoder); + } + + + /**************************** helper ******************************/ + + protected static PrefixTreeEncoder prepareEncoder(PrefixTreeEncoder encoder, + OutputStream outputStream, boolean includeMvccVersion) { + PrefixTreeEncoder ret = encoder; + if (encoder == null) { + ret = new PrefixTreeEncoder(outputStream, includeMvccVersion); + } + ret.reset(outputStream, includeMvccVersion); + return ret; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java new file mode 100644 index 0000000..ca73f91 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/EncoderPool.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + + +@InterfaceAudience.Private +public interface EncoderPool { + + PrefixTreeEncoder checkOut(OutputStream outputStream, boolean includeMvccVersion); + void checkIn(PrefixTreeEncoder encoder); + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java new file mode 100644 index 0000000..0dc9ff4 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellOutputStream; +import org.apache.hbase.cell.CellTool; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; +import org.apache.hbase.codec.prefixtree.encode.other.CellTypeEncoder; +import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.apache.hbase.codec.prefixtree.encode.row.RowSectionWriter; +import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hbase.util.byterange.ByteRangeSet; +import org.apache.hbase.util.byterange.impl.ByteRangeHashSet; +import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.apache.hbase.util.vint.UFIntTool; + +/** + * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are + * added they are completely copied into the various encoding structures. This is important because + * usually the cells being fed in during compactions will be transient.
+ *
+ * Usage:
+ * 1) constructor
+ * 4) append cells in sorted order: write(Cell cell)
+ * 5) flush()
+ */ +@InterfaceAudience.Private +public class PrefixTreeEncoder implements CellOutputStream { + + /**************** static ************************/ + + protected static final Log LOG = LogFactory.getLog(PrefixTreeEncoder.class); + + //future-proof where HBase supports multiple families in a data block. + public static final boolean MULITPLE_FAMILIES_POSSIBLE = true; + + private static final boolean USE_HASH_COLUMN_SORTER = true; + private static final int INITIAL_PER_CELL_ARRAY_SIZES = 256; + private static final int VALUE_BUFFER_INIT_SIZE = 64 * 1024; + + + /**************** fields *************************/ + + protected long numResets = 0L; + + protected OutputStream outputStream; + + /* + * Cannot change during a single block's encoding. If false, then substitute incoming Cell's + * mvccVersion with zero and write out the block as usual. + */ + protected boolean includeMvccVersion; + + /* + * reusable ByteRanges used for communicating with the sorters/compilers + */ + protected ByteRange rowRange; + protected ByteRange familyRange; + protected ByteRange qualifierRange; + + /* + * incoming Cell fields are copied into these arrays + */ + protected long[] timestamps; + protected long[] mvccVersions; + protected byte[] typeBytes; + protected int[] valueOffsets; + protected byte[] values; + + protected PrefixTreeBlockMeta blockMeta; + + /* + * Sub-encoders for the simple long/byte fields of a Cell. Add to these as each cell arrives and + * compile before flushing. + */ + protected LongEncoder timestampEncoder; + protected LongEncoder mvccVersionEncoder; + protected CellTypeEncoder cellTypeEncoder; + + /* + * Structures used for collecting families and qualifiers, de-duplicating them, and sorting them + * so they can be passed to the tokenizers. Unlike row keys where we can detect duplicates by + * comparing only with the previous row key, families and qualifiers can arrive in unsorted order + * in blocks spanning multiple rows. We must collect them all into a set to de-duplicate them. + */ + protected ByteRangeSet familyDeduplicator; + protected ByteRangeSet qualifierDeduplicator; + + /* + * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory + * trie structure with nodes connected by memory pointers (not serializable yet). + */ + protected Tokenizer rowTokenizer; + protected Tokenizer familyTokenizer; + protected Tokenizer qualifierTokenizer; + + /* + * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write + * all information to an output stream of bytes that can be stored on disk. + */ + protected RowSectionWriter rowWriter; + protected ColumnSectionWriter familyWriter; + protected ColumnSectionWriter qualifierWriter; + + /* + * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they + * were full KeyValues because some parts of HBase like to know the "unencoded size". + */ + protected int totalCells = 0; + protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues + protected int totalValueBytes = 0; + protected int maxValueLength = 0; + protected int totalBytes = 0;// + + + /***************** construct ***********************/ + + public PrefixTreeEncoder(OutputStream outputStream, boolean includeMvccVersion) { + // used during cell accumulation + this.blockMeta = new PrefixTreeBlockMeta(); + this.rowRange = new ByteRange(); + this.familyRange = new ByteRange(); + this.qualifierRange = new ByteRange(); + this.timestamps = new long[INITIAL_PER_CELL_ARRAY_SIZES]; + this.mvccVersions = new long[INITIAL_PER_CELL_ARRAY_SIZES]; + this.typeBytes = new byte[INITIAL_PER_CELL_ARRAY_SIZES]; + this.valueOffsets = new int[INITIAL_PER_CELL_ARRAY_SIZES]; + this.values = new byte[VALUE_BUFFER_INIT_SIZE]; + + // used during compilation + this.familyDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() + : new ByteRangeTreeSet(); + this.qualifierDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() + : new ByteRangeTreeSet(); + this.timestampEncoder = new LongEncoder(blockMeta); + this.mvccVersionEncoder = new LongEncoder(blockMeta); + this.cellTypeEncoder = new CellTypeEncoder(); + this.rowTokenizer = new Tokenizer(); + this.familyTokenizer = new Tokenizer(); + this.qualifierTokenizer = new Tokenizer(); + this.rowWriter = new RowSectionWriter(); + this.familyWriter = new ColumnSectionWriter(); + this.qualifierWriter = new ColumnSectionWriter(); + + reset(outputStream, includeMvccVersion); + } + + public void reset(OutputStream outputStream, boolean includeMvccVersion) { + ++numResets; + this.includeMvccVersion = includeMvccVersion; + this.outputStream = outputStream; + valueOffsets[0] = 0; + + familyDeduplicator.reset(); + qualifierDeduplicator.reset(); + rowTokenizer.reset(); + timestampEncoder.reset(); + mvccVersionEncoder.reset(); + cellTypeEncoder.reset(); + familyTokenizer.reset(); + qualifierTokenizer.reset(); + rowWriter.reset(); + familyWriter.reset(); + qualifierWriter.reset(); + + totalCells = 0; + totalUnencodedBytes = 0; + totalValueBytes = 0; + maxValueLength = 0; + totalBytes = 0; + } + + /** + * Check that the arrays used to hold cell fragments are large enough for the cell that is being + * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the + * first few block encodings but should stabilize quickly. + */ + protected void ensurePerCellCapacities() { + int currentCapacity = valueOffsets.length; + int neededCapacity = totalCells + 2;// some things write one index ahead. +2 to be safe + if (neededCapacity < currentCapacity) { + return; + } + + int padding = neededCapacity;//this will double the array size + timestamps = ArrayUtils.growIfNecessary(timestamps, neededCapacity, padding); + mvccVersions = ArrayUtils.growIfNecessary(mvccVersions, neededCapacity, padding); + typeBytes = ArrayUtils.growIfNecessary(typeBytes, neededCapacity, padding); + valueOffsets = ArrayUtils.growIfNecessary(valueOffsets, neededCapacity, padding); + } + + /******************** CellOutputStream methods *************************/ + + /** + * Note: Unused until support is added to the scanner/heap + *

+ * The following method are optimized versions of write(Cell cell). The result should be + * identical, however the implementation may be able to execute them much more efficiently because + * it does not need to compare the unchanged fields with the previous cell's. + *

+ * Consider the benefits during compaction when paired with a CellScanner that is also aware of + * row boundaries. The CellScanner can easily use these methods instead of blindly passing Cells + * to the write(Cell cell) method. + *

+ * The savings of skipping duplicate row detection are significant with long row keys. A + * DataBlockEncoder may store a row key once in combination with a count of how many cells are in + * the row. With a 100 byte row key, we can replace 100 byte comparisons with a single increment + * of the counter, and that is for every cell in the row. + */ + + /** + * Add a Cell to the output stream but repeat the previous row. + */ + //@Override + public void writeWithRepeatRow(Cell cell) { + ensurePerCellCapacities();//can we optimize away some of this? + + //save a relatively expensive row comparison, incrementing the row's counter instead + rowTokenizer.incrementNumOccurrencesOfLatestValue(); + addFamilyPart(cell); + addQualifierPart(cell); + addAfterRowFamilyQualifier(cell); + } + + + @Override + public void write(Cell cell) { + ensurePerCellCapacities(); + + rowTokenizer.addSorted(CellTool.fillRowRange(cell, rowRange)); + addFamilyPart(cell); + addQualifierPart(cell); + addAfterRowFamilyQualifier(cell); + } + + + /***************** internal add methods ************************/ + + private void addAfterRowFamilyQualifier(Cell cell){ + // timestamps + timestamps[totalCells] = cell.getTimestamp(); + timestampEncoder.add(cell.getTimestamp()); + + // memstore timestamps + if (includeMvccVersion) { + mvccVersions[totalCells] = cell.getMvccVersion(); + mvccVersionEncoder.add(cell.getMvccVersion()); + totalUnencodedBytes += WritableUtils.getVIntSize(cell.getMvccVersion()); + }else{ + //must overwrite in case there was a previous version in this array slot + mvccVersions[totalCells] = 0L; + if(totalCells == 0){//only need to do this for the first cell added + mvccVersionEncoder.add(0L); + } + //totalUncompressedBytes += 0;//mvccVersion takes zero bytes when disabled + } + + // types + typeBytes[totalCells] = cell.getTypeByte(); + cellTypeEncoder.add(cell.getTypeByte()); + + // values + totalValueBytes += cell.getValueLength(); + // double the array each time we run out of space + values = ArrayUtils.growIfNecessary(values, totalValueBytes, 2 * totalValueBytes); + CellTool.copyValueTo(cell, values, valueOffsets[totalCells]); + if (cell.getValueLength() > maxValueLength) { + maxValueLength = cell.getValueLength(); + } + valueOffsets[totalCells + 1] = totalValueBytes; + + // general + totalUnencodedBytes += KeyValueTool.length(cell); + ++totalCells; + } + + private void addFamilyPart(Cell cell) { + if (MULITPLE_FAMILIES_POSSIBLE || totalCells == 0) { + CellTool.fillFamilyRange(cell, familyRange); + familyDeduplicator.add(familyRange); + } + } + + private void addQualifierPart(Cell cell) { + CellTool.fillQualifierRange(cell, qualifierRange); + qualifierDeduplicator.add(qualifierRange); + } + + + /****************** compiling/flushing ********************/ + + /** + * Expensive method. The second half of the encoding work happens here. + * + * Take all the separate accumulated data structures and turn them into a single stream of bytes + * which is written to the outputStream. + */ + @Override + public void flush() throws IOException { + compile(); + + // do the actual flushing to the output stream. Order matters. + blockMeta.writeVariableBytesToOutputStream(outputStream); + rowWriter.writeBytes(outputStream); + familyWriter.writeBytes(outputStream); + qualifierWriter.writeBytes(outputStream); + timestampEncoder.writeBytes(outputStream); + mvccVersionEncoder.writeBytes(outputStream); + //CellType bytes are in the row nodes. there is no additional type section + outputStream.write(values, 0, totalValueBytes); + } + + /** + * Now that all the cells have been added, do the work to reduce them to a series of byte[] + * fragments that are ready to be written to the output stream. + */ + protected void compile(){ + blockMeta.setNumKeyValueBytes(totalUnencodedBytes); + int lastValueOffset = valueOffsets[totalCells]; + blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); + blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); + blockMeta.setNumValueBytes(totalValueBytes); + totalBytes += totalValueBytes; + + //these compile methods will add to totalBytes + compileTypes(); + compileMvccVersions(); + compileTimestamps(); + compileQualifiers(); + compileFamilies(); + compileRows(); + + int numMetaBytes = blockMeta.calculateNumMetaBytes(); + blockMeta.setNumMetaBytes(numMetaBytes); + totalBytes += numMetaBytes; + } + + /** + * The following "compile" methods do any intermediate work necessary to transform the cell + * fragments collected during the writing phase into structures that are ready to write to the + * outputStream. + *

+ * The family and qualifier treatment is almost identical, as is timestamp and mvccVersion. + */ + + protected void compileTypes() { + blockMeta.setAllSameType(cellTypeEncoder.areAllSameType()); + if(cellTypeEncoder.areAllSameType()){ + blockMeta.setAllTypes(cellTypeEncoder.getOnlyType()); + } + } + + protected void compileMvccVersions() { + mvccVersionEncoder.compile(); + blockMeta.setMvccVersionFields(mvccVersionEncoder); + int numMvccVersionBytes = mvccVersionEncoder.getOutputArrayLength(); + totalBytes += numMvccVersionBytes; + } + + protected void compileTimestamps() { + timestampEncoder.compile(); + blockMeta.setTimestampFields(timestampEncoder); + int numTimestampBytes = timestampEncoder.getOutputArrayLength(); + totalBytes += numTimestampBytes; + } + + protected void compileQualifiers() { + blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size()); + qualifierDeduplicator.compile(); + qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges()); + qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false); + qualifierWriter.compile(); + int numQualifierBytes = qualifierWriter.getNumBytes(); + blockMeta.setNumQualifierBytes(numQualifierBytes); + totalBytes += numQualifierBytes; + } + + protected void compileFamilies() { + blockMeta.setNumUniqueFamilies(familyDeduplicator.size()); + familyDeduplicator.compile(); + familyTokenizer.addAll(familyDeduplicator.getSortedRanges()); + familyWriter.reconstruct(blockMeta, familyTokenizer, true); + familyWriter.compile(); + int numFamilyBytes = familyWriter.getNumBytes(); + blockMeta.setNumFamilyBytes(numFamilyBytes); + totalBytes += numFamilyBytes; + } + + protected void compileRows() { + rowWriter.reconstruct(this); + rowWriter.compile(); + int numRowBytes = rowWriter.getNumBytes(); + blockMeta.setNumRowBytes(numRowBytes); + blockMeta.setRowTreeDepth(rowTokenizer.getTreeDepth()); + totalBytes += numRowBytes; + } + + /********************* convenience getters ********************************/ + + public long getValueOffset(int index) { + return valueOffsets[index]; + } + + public int getValueLength(int index) { + return (int) (valueOffsets[index + 1] - valueOffsets[index]); + } + + /************************* get/set *************************************/ + + public PrefixTreeBlockMeta getBlockMeta() { + return blockMeta; + } + + public Tokenizer getRowTokenizer() { + return rowTokenizer; + } + + public LongEncoder getTimestampEncoder() { + return timestampEncoder; + } + + public int getTotalBytes() { + return totalBytes; + } + + public long[] getTimestamps() { + return timestamps; + } + + public long[] getMvccVersions() { + return mvccVersions; + } + + public byte[] getTypeBytes() { + return typeBytes; + } + + public LongEncoder getMvccVersionEncoder() { + return mvccVersionEncoder; + } + + public ByteRangeSet getFamilySorter() { + return familyDeduplicator; + } + + public ByteRangeSet getQualifierSorter() { + return qualifierDeduplicator; + } + + public ColumnSectionWriter getFamilyWriter() { + return familyWriter; + } + + public ColumnSectionWriter getQualifierWriter() { + return qualifierWriter; + } + + public RowSectionWriter getRowWriter() { + return rowWriter; + } + + public ByteRange getValueByteRange() { + return new ByteRange(values, 0, totalValueBytes); + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java new file mode 100644 index 0000000..c60a8f5 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/ThreadLocalEncoderPool.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode; + +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * Pool to enable reusing the Encoder objects which can consist of thousands of smaller objects and + * would be more garbage than the data in the block. + */ +@InterfaceAudience.Private +public class ThreadLocalEncoderPool implements EncoderPool{ + + protected static ThreadLocal BUILDER_HOLDER + = new ThreadLocal(); + + @Override + public PrefixTreeEncoder checkOut(OutputStream os, boolean includeMvccVersion) { + PrefixTreeEncoder builder = BUILDER_HOLDER.get(); + builder = EncoderFactory.prepareEncoder(builder, os, includeMvccVersion); + BUILDER_HOLDER.set(builder); + return builder; + } + + @Override + public void checkIn(PrefixTreeEncoder encoder) { + // attached to thread on checkOut, so shouldn't need to do anything here + + // do we need to worry about detaching encoders from compaction threads or are the same threads + // used over and over + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java new file mode 100644 index 0000000..b84e15a --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.column; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Strings; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.util.vint.UFIntTool; +import org.apache.hbase.util.vint.UVIntTool; + +/** + * Column nodes can be either family nodes or qualifier nodes, as both sections encode similarly. + * The family and qualifier sections of the data block are made of 1 or more of these nodes. + *

+ * Each node is composed of 3 sections:
+ *

  • tokenLength: UVInt (normally 1 byte) indicating the number of token bytes + *
  • token[]: the actual token bytes + *
  • parentStartPosition: the offset of the next node from the start of the family or qualifier + * section + */ +@InterfaceAudience.Private +public class ColumnNodeWriter{ + + /************* fields ****************************/ + + protected TokenizerNode builderNode; + protected PrefixTreeBlockMeta blockMeta; + + protected boolean familyVsQualifier; + + protected int tokenLength; + protected byte[] token; + protected int parentStartPosition; + + + /*************** construct **************************/ + + public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode, + boolean familyVsQualifier) { + this.blockMeta = blockMeta; + this.builderNode = builderNode; + this.familyVsQualifier = familyVsQualifier; + calculateTokenLength(); + } + + + /************* methods *******************************/ + + public boolean isRoot() { + return parentStartPosition == 0; + } + + private void calculateTokenLength() { + tokenLength = builderNode.getTokenLength(); + token = new byte[tokenLength]; + } + + /** + * This method is called before blockMeta.qualifierOffsetWidth is known, so we pass in a + * placeholder. + * @param offsetWidthPlaceholder the placeholder + * @return node width + */ + public int getWidthUsingPlaceholderForOffsetWidth(int offsetWidthPlaceholder) { + int width = 0; + width += UVIntTool.numBytes(tokenLength); + width += token.length; + width += offsetWidthPlaceholder; + return width; + } + + public void writeBytes(OutputStream os) throws IOException { + int parentOffsetWidth; + if (familyVsQualifier) { + parentOffsetWidth = blockMeta.getFamilyOffsetWidth(); + } else { + parentOffsetWidth = blockMeta.getQualifierOffsetWidth(); + } + UVIntTool.writeBytes(tokenLength, os); + os.write(token); + UFIntTool.writeBytes(parentOffsetWidth, parentStartPosition, os); + } + + public void setTokenBytes(ByteRange source) { + source.deepCopySubRangeTo(0, tokenLength, token, 0); + } + + + /****************** standard methods ************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(Strings.padFront(builderNode.getOutputArrayOffset() + "", ' ', 3) + ","); + sb.append("["); + sb.append(Bytes.toString(token)); + sb.append("]->"); + sb.append(parentStartPosition); + return sb.toString(); + } + + + /************************** get/set ***********************/ + + public void setParentStartPosition(int parentStartPosition) { + this.parentStartPosition = parentStartPosition; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java new file mode 100644 index 0000000..1dd6d45 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.column; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.util.vint.UFIntTool; + +import com.google.common.collect.Lists; + +/** + * Takes the tokenized family or qualifier data and flattens it into a stream of bytes. The family + * section is written after the row section, and qualifier section after family section. + *

    + * The family and qualifier tries, or "column tries", are structured differently than the row trie. + * The trie cannot be reassembled without external data about the offsets of the leaf nodes, and + * these external pointers are stored in the nubs and leaves of the row trie. For each cell in a + * row, the row trie contains a list of offsets into the column sections (along with pointers to + * timestamps and other per-cell fields). These offsets point to the last column node/token that + * comprises the column name. To assemble the column name, the trie is traversed in reverse (right + * to left), with the rightmost tokens pointing to the start of their "parent" node which is the + * node to the left. + *

    + * This choice was made to reduce the size of the column trie by storing the minimum amount of + * offset data. As a result, to find a specific qualifier within a row, you must do a binary search + * of the column nodes, reassembling each one as you search. Future versions of the PrefixTree might + * encode the columns in both a forward and reverse trie, which would convert binary searches into + * more efficient trie searches which would be beneficial for wide rows. + */ +@InterfaceAudience.Private +public class ColumnSectionWriter { + + public static final int EXPECTED_NUBS_PLUS_LEAVES = 100; + + /****************** fields ****************************/ + + protected PrefixTreeBlockMeta blockMeta; + + protected boolean familyVsQualifier; + protected Tokenizer tokenizer; + protected int numBytes = 0; + protected int numAppended = 0;// for tests + protected ArrayList nonLeaves; + protected ArrayList leaves; + protected int numNonLeaves = 0; + protected int numLeaves = 0; + protected ArrayList allNodes; + protected ArrayList columnNodeWriters; + protected List outputArrayOffsets; + + + /*********************** construct *********************/ + + public ColumnSectionWriter() { + this.nonLeaves = Lists.newArrayList(); + this.leaves = Lists.newArrayList(); + this.outputArrayOffsets = Lists.newArrayList(); + } + + public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder, + boolean familyVsQualifier) { + this();// init collections + reconstruct(blockMeta, builder, familyVsQualifier); + } + + public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder, + boolean familyVsQualifier) { + this.blockMeta = blockMeta; + this.tokenizer = builder; + this.familyVsQualifier = familyVsQualifier; + } + + public void reset() { + numBytes = 0; + numAppended = 0; + nonLeaves.clear(); + leaves.clear(); + outputArrayOffsets.clear(); + numNonLeaves = 0; + numLeaves = 0; + } + + + /****************** methods *******************************/ + + public ColumnSectionWriter compile() { + if (familyVsQualifier) { + // do nothing. max family length fixed at Byte.MAX_VALUE + } else { + blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength()); + } + + tokenizer.setNodeFirstInsertionIndexes(); + + tokenizer.appendNodes(nonLeaves, true, false); + numNonLeaves = nonLeaves.size(); + + tokenizer.appendNodes(leaves, false, true); + numLeaves = leaves.size(); + + allNodes = Lists.newArrayListWithCapacity(nonLeaves.size() + leaves.size()); + allNodes.addAll(nonLeaves); + allNodes.addAll(leaves); + + columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes)); + for (int i = 0; i < allNodes.size(); ++i) { + TokenizerNode node = allNodes.get(i); + columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier)); + } + + // leaf widths are known at this point, so add them up + int totalBytesWithoutOffsets = 0; + for (int i = allNodes.size() - 1; i >= 0; --i) { + ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); + // leaves store all but their first token byte + totalBytesWithoutOffsets += columnNodeWriter.getWidthUsingPlaceholderForOffsetWidth(0); + } + + // figure out how wide our offset FInts are + int parentOffsetWidth = 0; + while (true) { + ++parentOffsetWidth; + int numBytesFinder = totalBytesWithoutOffsets + parentOffsetWidth * allNodes.size(); + if (numBytesFinder < UFIntTool.maxValueForNumBytes(parentOffsetWidth)) { + numBytes = numBytesFinder; + break; + }// it fits + } + if (familyVsQualifier) { + blockMeta.setFamilyOffsetWidth(parentOffsetWidth); + } else { + blockMeta.setQualifierOffsetWidth(parentOffsetWidth); + } + + int forwardIndex = 0; + for (int i = 0; i < allNodes.size(); ++i) { + TokenizerNode node = allNodes.get(i); + ColumnNodeWriter columnNodeWriter = columnNodeWriters.get(i); + int fullNodeWidth = columnNodeWriter + .getWidthUsingPlaceholderForOffsetWidth(parentOffsetWidth); + node.setOutputArrayOffset(forwardIndex); + columnNodeWriter.setTokenBytes(node.getToken()); + if (node.isRoot()) { + columnNodeWriter.setParentStartPosition(0); + } else { + columnNodeWriter.setParentStartPosition(node.getParent().getOutputArrayOffset()); + } + forwardIndex += fullNodeWidth; + } + + tokenizer.appendOutputArrayOffsets(outputArrayOffsets); + + return this; + } + + public void writeBytes(OutputStream os) throws IOException { + for (ColumnNodeWriter columnNodeWriter : columnNodeWriters) { + columnNodeWriter.writeBytes(os); + } + } + + + /************* get/set **************************/ + + public ArrayList getColumnNodeWriters() { + return columnNodeWriters; + } + + public int getNumBytes() { + return numBytes; + } + + public int getOutputArrayOffset(int sortedIndex) { + return outputArrayOffsets.get(sortedIndex); + } + + public ArrayList getNonLeaves() { + return nonLeaves; + } + + public ArrayList getLeaves() { + return leaves; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java new file mode 100644 index 0000000..963c307 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/CellTypeEncoder.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.other; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Detect if every KV has the same KeyValue.Type, in which case we don't need to store it for each + * KV. If(allSameType) during conversion to byte[], then we can store the "onlyType" in blockMeta, + * therefore not repeating it for each cell and saving 1 byte per cell. + */ +@InterfaceAudience.Private +public class CellTypeEncoder { + + /************* fields *********************/ + + protected boolean pendingFirstType = true; + protected boolean allSameType = true; + protected byte onlyType; + + + /************* construct *********************/ + + public void reset() { + pendingFirstType = true; + allSameType = true; + } + + + /************* methods *************************/ + + public void add(byte type) { + if (pendingFirstType) { + onlyType = type; + pendingFirstType = false; + } else if (onlyType != type) { + allSameType = false; + } + } + + + /**************** get/set **************************/ + + public boolean areAllSameType() { + return allSameType; + } + + public byte getOnlyType() { + return onlyType; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java new file mode 100644 index 0000000..959fdd8 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/other/LongEncoder.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.other; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.HashSet; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.util.vint.UFIntTool; + +import com.google.common.base.Joiner; + +/** + * Used to de-duplicate, sort, minimize/diff, and serialize timestamps and mvccVersions from a + * collection of Cells. + * + * 1. add longs to a HashSet for fast de-duplication + * 2. keep track of the min and max + * 3. copy all values to a new long[] + * 4. Collections.sort the long[] + * 5. calculate maxDelta = max - min + * 6. determine FInt width based on maxDelta + * 7. PrefixTreeEncoder binary searches to find index of each value + */ +@InterfaceAudience.Private +public class LongEncoder { + + /****************** fields ****************************/ + + protected PrefixTreeBlockMeta blockMeta; + + protected HashSet uniqueValues; + protected long[] sortedUniqueValues; + protected long min, max, maxDelta; + + protected int bytesPerDelta; + protected int bytesPerIndex; + protected int totalCompressedBytes; + + + /****************** construct ****************************/ + + public LongEncoder(PrefixTreeBlockMeta blockMeta) { + this.blockMeta = blockMeta; + this.uniqueValues = new HashSet(); + } + + public void reset() { + uniqueValues.clear(); + sortedUniqueValues = null; + min = Long.MAX_VALUE; + max = Long.MIN_VALUE; + maxDelta = Long.MIN_VALUE; + bytesPerIndex = 0; + bytesPerDelta = 0; + totalCompressedBytes = 0; + } + + + /************* methods ***************************/ + + public void add(long timestamp) { + uniqueValues.add(timestamp); + } + + public LongEncoder compile() { + int numUnique = uniqueValues.size(); + if (numUnique == 1) { + min = CollectionUtils.getFirst(uniqueValues); + sortedUniqueValues = new long[] { min }; + return this; + } + + sortedUniqueValues = new long[numUnique]; + int lastIndex = -1; + for (long value : uniqueValues) { + sortedUniqueValues[++lastIndex] = value; + } + Arrays.sort(sortedUniqueValues); + min = ArrayUtils.getFirst(sortedUniqueValues); + max = ArrayUtils.getLast(sortedUniqueValues); + maxDelta = max - min; + if (maxDelta > 0) { + bytesPerDelta = UFIntTool.numBytes(maxDelta); + } else { + bytesPerDelta = 0; + } + + int maxIndex = numUnique - 1; + bytesPerIndex = UFIntTool.numBytes(maxIndex); + + totalCompressedBytes = numUnique * bytesPerDelta; + + return this; + } + + public long getDelta(int index) { + if (sortedUniqueValues.length == 0) { + return 0; + } + return sortedUniqueValues[index] - min; + } + + public int getIndex(long value) { + // should always find an exact match + return Arrays.binarySearch(sortedUniqueValues, value); + } + + public void writeBytes(OutputStream os) throws IOException { + for (int i = 0; i < sortedUniqueValues.length; ++i) { + long delta = sortedUniqueValues[i] - min; + UFIntTool.writeBytes(bytesPerDelta, delta, os); + } + } + + //convenience method for tests + public byte[] getByteArray() throws IOException{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writeBytes(baos); + return baos.toByteArray(); + } + + public int getOutputArrayLength() { + return sortedUniqueValues.length * bytesPerDelta; + } + + public int getNumUniqueValues() { + return sortedUniqueValues.length; + } + + + /******************* Object methods **********************/ + + @Override + public String toString() { + if (ArrayUtils.isEmpty(sortedUniqueValues)) { + return "[]"; + } + return "[" + Joiner.on(",").join(ArrayUtils.toList(sortedUniqueValues)) + "]"; + } + + + /******************** get/set **************************/ + + public long getMin() { + return min; + } + + public int getBytesPerDelta() { + return bytesPerDelta; + } + + public int getBytesPerIndex() { + return bytesPerIndex; + } + + public int getTotalCompressedBytes() { + return totalCompressedBytes; + } + + public long[] getSortedUniqueTimestamps() { + return sortedUniqueValues; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java new file mode 100644 index 0000000..748a7f6 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowNodeWriter.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.row; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.util.vint.UFIntTool; +import org.apache.hbase.util.vint.UVIntTool; + +/** + * Serializes the fields comprising one node of the row trie, which can be a branch, nub, or leaf. + * Please see the write() method for the order in which data is written. + */ +@InterfaceAudience.Private +public class RowNodeWriter{ + protected static final Log LOG = LogFactory.getLog(RowNodeWriter.class); + + /********************* fields ******************************/ + + protected PrefixTreeEncoder prefixTreeEncoder; + protected PrefixTreeBlockMeta blockMeta; + protected TokenizerNode tokenizerNode; + + protected int tokenWidth; + protected int fanOut; + protected int numCells; + + protected int width; + + + /*********************** construct *************************/ + + public RowNodeWriter(PrefixTreeEncoder keyValueBuilder, TokenizerNode tokenizerNode) { + reconstruct(keyValueBuilder, tokenizerNode); + } + + public void reconstruct(PrefixTreeEncoder prefixTreeEncoder, TokenizerNode tokenizerNode) { + this.prefixTreeEncoder = prefixTreeEncoder; + reset(tokenizerNode); + } + + public void reset(TokenizerNode node) { + this.blockMeta = prefixTreeEncoder.getBlockMeta();// changes between blocks + this.tokenizerNode = node; + this.tokenWidth = 0; + this.fanOut = 0; + this.numCells = 0; + this.width = 0; + calculateOffsetsAndLengths(); + } + + + /********************* methods ****************************/ + + protected void calculateOffsetsAndLengths(){ + tokenWidth = tokenizerNode.getTokenLength(); + if(!tokenizerNode.isRoot()){ + --tokenWidth;//root has no parent + } + fanOut = CollectionUtils.nullSafeSize(tokenizerNode.getChildren()); + numCells = tokenizerNode.getNumOccurrences(); + } + + public int calculateWidth(){ + calculateWidthOverrideOffsetWidth(blockMeta.getNextNodeOffsetWidth()); + return width; + } + + public int calculateWidthOverrideOffsetWidth(int offsetWidth){ + width = 0; + width += UVIntTool.numBytes(tokenWidth); + width += tokenWidth; + + width += UVIntTool.numBytes(fanOut); + width += fanOut; + + width += UVIntTool.numBytes(numCells); + + if(tokenizerNode.hasOccurrences()){ + int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth() + + blockMeta.getQualifierOffsetWidth() + + blockMeta.getTimestampIndexWidth() + + blockMeta.getMvccVersionIndexWidth() + + blockMeta.getKeyValueTypeWidth() + + blockMeta.getValueOffsetWidth() + + blockMeta.getValueLengthWidth(); + width += numCells * fixedBytesPerCell; + } + + if( ! tokenizerNode.isLeaf()){ + width += fanOut * offsetWidth; + } + + return width; + } + + + /*********************** writing the compiled structure to the OutputStream ***************/ + + public void write(OutputStream os) throws IOException{ + //info about this row trie node + writeRowToken(os); + writeFan(os); + writeNumCells(os); + + //UFInt indexes and offsets for each cell in the row (if nub or leaf) + writeFamilyNodeOffsets(os); + writeQualifierNodeOffsets(os); + writeTimestampIndexes(os); + writeMvccVersionIndexes(os); + writeCellTypes(os); + writeValueOffsets(os); + writeValueLengths(os); + + //offsets to the children of this row trie node (if branch or nub) + writeNextRowTrieNodeOffsets(os); + } + + + /** + * Row node token, fan, and numCells. Written once at the beginning of each row node. These 3 + * fields can reproduce all the row keys that compose the block. + */ + + /** + * UVInt: tokenWidth + * bytes: token + */ + protected void writeRowToken(OutputStream os) throws IOException { + UVIntTool.writeBytes(tokenWidth, os); + int tokenStartIndex = tokenizerNode.isRoot() ? 0 : 1; + ByteRangeTool.write(os, tokenizerNode.getToken(), tokenStartIndex); + } + + /** + * UVInt: numFanBytes/fanOut + * bytes: each fan byte + */ + public void writeFan(OutputStream os) throws IOException { + UVIntTool.writeBytes(fanOut, os); + if (fanOut <= 0) { + return; + } + ArrayList children = tokenizerNode.getChildren(); + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + os.write(child.getToken().get(0));// first byte of each child's token + } + } + + /** + * UVInt: numCells, the number of cells in this row which will be 0 for branch nodes + */ + protected void writeNumCells(OutputStream os) throws IOException { + UVIntTool.writeBytes(numCells, os); + } + + + /** + * The following methods write data for each cell in the row, mostly consisting of indexes or + * offsets into the timestamp/column data structures that are written in the middle of the block. + * We use {@link UFIntTool} to encode these indexes/offsets to allow random access during a binary + * search of a particular column/timestamp combination. + *

    + * Branch nodes will not have any data in these sections. + */ + + protected void writeFamilyNodeOffsets(OutputStream os) throws IOException { + if (blockMeta.getFamilyOffsetWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = PrefixTreeEncoder.MULITPLE_FAMILIES_POSSIBLE ? tokenizerNode + .getFirstInsertionIndex() + i : 0; + int sortedIndex = prefixTreeEncoder.getFamilySorter().getSortedIndexForInsertionId( + cellInsertionIndex); + int indexedFamilyOffset = prefixTreeEncoder.getFamilyWriter().getOutputArrayOffset( + sortedIndex); + UFIntTool.writeBytes(blockMeta.getFamilyOffsetWidth(), indexedFamilyOffset, os); + } + } + + protected void writeQualifierNodeOffsets(OutputStream os) throws IOException { + if (blockMeta.getQualifierOffsetWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + int sortedIndex = prefixTreeEncoder.getQualifierSorter().getSortedIndexForInsertionId( + cellInsertionIndex); + int indexedQualifierOffset = prefixTreeEncoder.getQualifierWriter().getOutputArrayOffset( + sortedIndex); + UFIntTool.writeBytes(blockMeta.getQualifierOffsetWidth(), indexedQualifierOffset, os); + } + } + + protected void writeTimestampIndexes(OutputStream os) throws IOException { + if (blockMeta.getTimestampIndexWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long timestamp = prefixTreeEncoder.getTimestamps()[cellInsertionIndex]; + int timestampIndex = prefixTreeEncoder.getTimestampEncoder().getIndex(timestamp); + UFIntTool.writeBytes(blockMeta.getTimestampIndexWidth(), timestampIndex, os); + } + } + + protected void writeMvccVersionIndexes(OutputStream os) throws IOException { + if (blockMeta.getMvccVersionIndexWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long mvccVersion = prefixTreeEncoder.getMvccVersions()[cellInsertionIndex]; + int mvccVersionIndex = prefixTreeEncoder.getMvccVersionEncoder().getIndex(mvccVersion); + UFIntTool.writeBytes(blockMeta.getMvccVersionIndexWidth(), mvccVersionIndex, os); + } + } + + protected void writeCellTypes(OutputStream os) throws IOException { + if (blockMeta.isAllSameType()) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + os.write(prefixTreeEncoder.getTypeBytes()[cellInsertionIndex]); + } + } + + protected void writeValueOffsets(OutputStream os) throws IOException { + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + long valueStartIndex = prefixTreeEncoder.getValueOffset(cellInsertionIndex); + UFIntTool.writeBytes(blockMeta.getValueOffsetWidth(), valueStartIndex, os); + } + } + + protected void writeValueLengths(OutputStream os) throws IOException { + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + int valueLength = prefixTreeEncoder.getValueLength(cellInsertionIndex); + UFIntTool.writeBytes(blockMeta.getValueLengthWidth(), valueLength, os); + } + } + + + /** + * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes. + */ + protected void writeNextRowTrieNodeOffsets(OutputStream os) throws IOException { + ArrayList children = tokenizerNode.getChildren(); + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + int distanceToChild = tokenizerNode.getNegativeIndex() - child.getNegativeIndex(); + UFIntTool.writeBytes(blockMeta.getNextNodeOffsetWidth(), distanceToChild, os); + } + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java new file mode 100644 index 0000000..03a099c --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/row/RowSectionWriter.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.row; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.util.vint.UFIntTool; + +import com.google.common.collect.Lists; + +/** + * Most of the complexity of the PrefixTree is contained in the "row section". It contains the row + * key trie structure used to search and recreate all the row keys. Each nub and leaf in this trie + * also contains references to offsets in the other sections of the data block that enable the + * decoder to match a row key with its qualifier, timestamp, type, value, etc. + *

    + * The row section is a concatenated collection of {@link RowNodeWriter}s. See that class for the + * internals of each row node. + */ +@InterfaceAudience.Private +public class RowSectionWriter { + + /***************** fields **************************/ + + protected PrefixTreeEncoder prefixTreeEncoder; + + protected PrefixTreeBlockMeta blockMeta; + + protected int numBytes; + protected int numAppended = 0;// for tests + + protected ArrayList nonLeaves; + protected ArrayList leaves; + + protected int numNonLeaves = 0; + protected int numLeaves = 0; + + protected ArrayList leafWriters; + protected ArrayList nonLeafWriters; + + protected int numLeafWriters; + protected int numNonLeafWriters; + + + /********************* construct **********************/ + + public RowSectionWriter() { + this.nonLeaves = Lists.newArrayList(); + this.leaves = Lists.newArrayList(); + this.leafWriters = Lists.newArrayList(); + this.nonLeafWriters = Lists.newArrayList(); + } + + public RowSectionWriter(PrefixTreeEncoder prefixTreeEncoder) { + reconstruct(prefixTreeEncoder); + } + + public void reconstruct(PrefixTreeEncoder prefixTreeEncoder) { + this.prefixTreeEncoder = prefixTreeEncoder; + this.blockMeta = prefixTreeEncoder.getBlockMeta(); + reset(); + } + + public void reset() { + numBytes = 0; + numAppended = 0; + nonLeaves.clear(); + leaves.clear(); + numNonLeaves = 0; + numLeaves = 0; + numLeafWriters = 0; + numNonLeafWriters = 0; + } + + + /****************** methods *******************************/ + + public RowSectionWriter compile() { + blockMeta.setMaxRowLength(prefixTreeEncoder.getRowTokenizer().getMaxElementLength()); + prefixTreeEncoder.getRowTokenizer().setNodeFirstInsertionIndexes(); + + prefixTreeEncoder.getRowTokenizer().appendNodes(nonLeaves, true, false); + numNonLeaves = nonLeaves.size(); + prefixTreeEncoder.getRowTokenizer().appendNodes(leaves, false, true); + numLeaves = leaves.size(); + + // track the starting position of each node in final output + int negativeIndex = 0; + + // create leaf writer nodes + // leaf widths are known at this point, so add them up + int totalLeafBytes = 0; + for (int i = leaves.size() - 1; i >= 0; --i) { + TokenizerNode leaf = leaves.get(i); + RowNodeWriter leafWriter = initializeWriter(leafWriters, numLeafWriters, leaf); + ++numLeafWriters; + // leaves store all but their first token byte + int leafNodeWidth = leafWriter.calculateWidthOverrideOffsetWidth(0); + totalLeafBytes += leafNodeWidth; + negativeIndex += leafNodeWidth; + leaf.setNegativeIndex(negativeIndex); + } + + int totalNonLeafBytesWithoutOffsets = 0; + int totalChildPointers = 0; + for (int i = nonLeaves.size() - 1; i >= 0; --i) { + TokenizerNode nonLeaf = nonLeaves.get(i); + RowNodeWriter nonLeafWriter = initializeWriter(nonLeafWriters, numNonLeafWriters, nonLeaf); + ++numNonLeafWriters; + totalNonLeafBytesWithoutOffsets += nonLeafWriter.calculateWidthOverrideOffsetWidth(0); + totalChildPointers += nonLeaf.getNumChildren(); + } + + // figure out how wide our offset FInts are + int offsetWidth = 0; + while (true) { + ++offsetWidth; + int offsetBytes = totalChildPointers * offsetWidth; + int totalRowBytes = totalNonLeafBytesWithoutOffsets + offsetBytes + totalLeafBytes; + if (totalRowBytes < UFIntTool.maxValueForNumBytes(offsetWidth)) { + // it fits + numBytes = totalRowBytes; + break; + } + } + blockMeta.setNextNodeOffsetWidth(offsetWidth); + + // populate negativeIndexes + for (int i = nonLeaves.size() - 1; i >= 0; --i) { + TokenizerNode nonLeaf = nonLeaves.get(i); + int writerIndex = numNonLeaves - i - 1; + RowNodeWriter nonLeafWriter = nonLeafWriters.get(writerIndex); + int nodeWidth = nonLeafWriter.calculateWidth(); + negativeIndex += nodeWidth; + nonLeaf.setNegativeIndex(negativeIndex); + } + + return this; + } + + protected RowNodeWriter initializeWriter(List list, int index, + TokenizerNode builderNode) { + RowNodeWriter rowNodeWriter = null; + //check if there is an existing node we can recycle + if (index >= list.size()) { + //there are not enough existing nodes, so add a new one which will be retrieved below + list.add(new RowNodeWriter(prefixTreeEncoder, builderNode)); + } + rowNodeWriter = list.get(index); + rowNodeWriter.reset(builderNode); + return rowNodeWriter; + } + + + public void writeBytes(OutputStream os) throws IOException { + for (int i = numNonLeafWriters - 1; i >= 0; --i) { + RowNodeWriter nonLeafWriter = nonLeafWriters.get(i); + nonLeafWriter.write(os); + } + // duplicates above... written more for clarity right now + for (int i = numLeafWriters - 1; i >= 0; --i) { + RowNodeWriter leafWriter = leafWriters.get(i); + leafWriter.write(os); + } + } + + + /***************** static ******************************/ + + protected static ArrayList filterByLeafAndReverse( + ArrayList ins, boolean leaves) { + ArrayList outs = Lists.newArrayList(); + for (int i = ins.size() - 1; i >= 0; --i) { + TokenizerNode n = ins.get(i); + if (n.isLeaf() && leaves || (!n.isLeaf() && !leaves)) { + outs.add(ins.get(i)); + } + } + return outs; + } + + + /************* get/set **************************/ + + public int getNumBytes() { + return numBytes; + } + + public ArrayList getNonLeaves() { + return nonLeaves; + } + + public ArrayList getLeaves() { + return leaves; + } + + public ArrayList getNonLeafWriters() { + return nonLeafWriters; + } + + public ArrayList getLeafWriters() { + return leafWriters; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java new file mode 100644 index 0000000..e1082e0 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenDepthComparator.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.tokenize; + +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Determines order of nodes in the output array. Maybe possible to optimize further. + */ +@InterfaceAudience.Private +public class TokenDepthComparator implements Comparator { + + @Override + public int compare(TokenizerNode a, TokenizerNode b) { + if(a==null){ + throw new IllegalArgumentException("a cannot be null"); + } + if(b==null){ + throw new IllegalArgumentException("b cannot be null"); + } + + // put leaves at the end + if (!a.isLeaf() && b.isLeaf()) { + return -1; + } + if (a.isLeaf() && !b.isLeaf()) { + return 1; + } + + if (a.isLeaf() && b.isLeaf()) {// keep leaves in sorted order (for debugability) + return a.getId() < b.getId() ? -1 : 1; + } + + // compare depth + if (a.getTokenOffset() < b.getTokenOffset()) { + return -1; + } + if (a.getTokenOffset() > b.getTokenOffset()) { + return 1; + } + + // if same depth, return lower id first. ids are unique + return a.getId() < b.getId() ? -1 : 1; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java new file mode 100644 index 0000000..9b43c47 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/Tokenizer.java @@ -0,0 +1,239 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.tokenize; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; + +import com.google.common.collect.Lists; + +/** + * Data structure used in the first stage of PrefixTree encoding: + *

  • accepts a sorted stream of ByteRanges + *
  • splits them into a set of tokens, each held by a {@link TokenizerNode} + *
  • connects the TokenizerNodes via standard java references + *
  • keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content + *


    + * Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier + * encoding. + */ +@InterfaceAudience.Private +public class Tokenizer{ + + /***************** fields **************************/ + + protected int numArraysAdded = 0; + protected long lastNodeId = -1; + protected ArrayList nodes; + protected int numNodes; + protected TokenizerNode root; + protected byte[] tokens; + protected int tokensLength; + + protected int maxElementLength = 0; + // number of levels in the tree assuming root level is 0 + protected int treeDepth = 0; + + + /******************* construct *******************/ + + public Tokenizer() { + this.nodes = Lists.newArrayList(); + this.tokens = new byte[0]; + } + + public void reset() { + numArraysAdded = 0; + lastNodeId = -1; + numNodes = 0; + tokensLength = 0; + root = null; + maxElementLength = 0; + treeDepth = 0; + } + + + /***************** building *************************/ + + public void addAll(ArrayList sortedByteRanges) { + for (int i = 0; i < sortedByteRanges.size(); ++i) { + ByteRange byteRange = sortedByteRanges.get(i); + addSorted(byteRange); + } + } + + public void addSorted(final ByteRange bytes) { + ++numArraysAdded; + if (bytes.getLength() > maxElementLength) { + maxElementLength = bytes.getLength(); + } + if (root == null) { + // nodeDepth of firstNode (non-root) is 1 + root = addNode(null, 1, 0, bytes, 0); + } else { + root.addSorted(bytes); + } + } + + public void incrementNumOccurrencesOfLatestValue(){ + CollectionUtils.getLast(nodes).incrementNumOccurrences(1); + } + + protected long nextNodeId() { + return ++lastNodeId; + } + + protected TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset, + final ByteRange token, int inputTokenOffset) { + int inputTokenLength = token.getLength() - inputTokenOffset; + int tokenOffset = appendTokenAndRepointByteRange(token, inputTokenOffset); + TokenizerNode node = null; + if (nodes.size() <= numNodes) { + node = new TokenizerNode(this, parent, nodeDepth, tokenStartOffset, tokenOffset, + inputTokenLength); + nodes.add(node); + } else { + node = nodes.get(numNodes); + node.reset(); + node.reconstruct(this, parent, nodeDepth, tokenStartOffset, tokenOffset, inputTokenLength); + } + ++numNodes; + return node; + } + + protected int appendTokenAndRepointByteRange(final ByteRange token, int inputTokenOffset) { + int newOffset = tokensLength; + int inputTokenLength = token.getLength() - inputTokenOffset; + int newMinimum = tokensLength + inputTokenLength; + tokens = ArrayUtils.growIfNecessary(tokens, newMinimum, 2 * newMinimum); + token.deepCopySubRangeTo(inputTokenOffset, inputTokenLength, tokens, tokensLength); + tokensLength += inputTokenLength; + return newOffset; + } + + protected void submitMaxNodeDepthCandidate(int nodeDepth) { + if (nodeDepth > treeDepth) { + treeDepth = nodeDepth; + } + } + + + /********************* read ********************/ + + public int getNumAdded(){ + return numArraysAdded; + } + + // for debugging + public ArrayList getNodes(boolean includeNonLeaves, boolean includeLeaves) { + ArrayList nodes = Lists.newArrayList(); + root.appendNodesToExternalList(nodes, includeNonLeaves, includeLeaves); + return nodes; + } + + public void appendNodes(List appendTo, boolean includeNonLeaves, + boolean includeLeaves) { + root.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); + } + + public List getArrays() { + List nodes = new ArrayList(); + root.appendNodesToExternalList(nodes, true, true); + List byteArrays = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(nodes)); + for (int i = 0; i < nodes.size(); ++i) { + TokenizerNode node = nodes.get(i); + for (int j = 0; j < node.getNumOccurrences(); ++j) { + byte[] byteArray = node.getNewByteArray(); + byteArrays.add(byteArray); + } + } + return byteArrays; + } + + //currently unused, but working and possibly useful in the future + public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, + int keyLength) { + root.getNode(resultHolder, key, keyOffset, keyLength); + } + + + /********************** write ***************************/ + + public Tokenizer setNodeFirstInsertionIndexes() { + root.setInsertionIndexes(0); + return this; + } + + public Tokenizer appendOutputArrayOffsets(List offsets) { + root.appendOutputArrayOffsets(offsets); + return this; + } + + + /********************* print/debug ********************/ + + protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING = false; + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getStructuralString()); + if (INCLUDE_FULL_TREE_IN_TO_STRING) { + for (byte[] bytes : getArrays()) { + if (sb.length() > 0) { + sb.append("\n"); + } + sb.append(Bytes.toString(bytes)); + } + } + return sb.toString(); + } + + public String getStructuralString() { + List nodes = getNodes(true, true); + StringBuilder sb = new StringBuilder(); + for (TokenizerNode node : nodes) { + String line = node.getPaddedTokenAndOccurrenceString(); + sb.append(line + "\n"); + } + return sb.toString(); + } + + + /****************** get/set ************************/ + + public TokenizerNode getRoot() { + return root; + } + + public int getMaxElementLength() { + return maxElementLength; + } + + public int getTreeDepth() { + return treeDepth; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java new file mode 100644 index 0000000..2b8a86c --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerNode.java @@ -0,0 +1,632 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.tokenize; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.Strings; + +import com.google.common.collect.Lists; + +/** + * Individual node in a Trie structure. Each node is one of 3 types: + *

  • Branch: an internal trie node that may have a token and must have multiple children, but does + * not represent an actual input byte[], hence its numOccurrences is 0 + *
  • Leaf: a node with no children and where numOccurrences is >= 1. It's token represents the + * last bytes in the input byte[]s. + *
  • Nub: a combination of a branch and leaf. Its token represents the last bytes of input + * byte[]s and has numOccurrences >= 1, but it also has child nodes which represent input byte[]s + * that add bytes to this nodes input byte[]. + *

    + * Example inputs (numInputs=7): + * 0: AAA + * 1: AAA + * 2: AAB + * 3: AAB + * 4: AAB + * 5: AABQQ + * 6: AABQQ + *

    + * Resulting TokenizerNodes: + * AA <- branch, numOccurrences=0, tokenStartOffset=0, token.length=2 + * A <- leaf, numOccurrences=2, tokenStartOffset=2, token.length=1 + * B <- nub, numOccurrences=3, tokenStartOffset=2, token.length=1 + * QQ <- leaf, numOccurrences=2, tokenStartOffset=3, token.length=2 + *

    + * numInputs == 7 == sum(numOccurrences) == 0 + 2 + 3 + 2 + */ +@InterfaceAudience.Private +public class TokenizerNode{ + + /* + * Ref to data structure wrapper + */ + protected Tokenizer builder; + + /****************************************************************** + * Tree content/structure used during tokenization + * ****************************************************************/ + + /* + * ref to parent trie node + */ + protected TokenizerNode parent; + + /* + * node depth in trie, irrespective of each node's token length + */ + protected int nodeDepth; + + /* + * start index of this token in original byte[] + */ + protected int tokenStartOffset; + + /* + * bytes for this trie node. can be length 0 in root node + */ + protected ByteRange token; + + /* + * A count of occurrences in the input byte[]s, not the trie structure. 0 for branch nodes, 1+ for + * nubs and leaves. If the same byte[] is added to the trie multiple times, this is the only thing + * that changes in the tokenizer. As a result, duplicate byte[]s are very inexpensive to encode. + */ + protected int numOccurrences; + + /* + * The maximum fan-out of a byte[] trie is 256, so there are a maximum of 256 + * child nodes. + */ + protected ArrayList children; + + + /* + * Fields used later in the encoding process for sorting the nodes into the order they'll be + * written to the output byte[]. With these fields, the TokenizerNode and therefore Tokenizer + * are not generic data structures but instead are specific to HBase PrefixTree encoding. + */ + + /* + * unique id assigned to each TokenizerNode + */ + protected long id; + + /* + * set >=0 for nubs and leaves + */ + protected int firstInsertionIndex = -1; + + /* + * A positive value indicating how many bytes before the end of the block this node will start. If + * the section is 55 bytes and negativeOffset is 9, then the node will start at 46. + */ + protected int negativeIndex = 0; + + /* + * The offset in the output array at which to start writing this node's token bytes. Influenced + * by the lengths of all tokens sorted before this one. + */ + protected int outputArrayOffset = -1; + + + /*********************** construct *****************************/ + + public TokenizerNode(Tokenizer builder, TokenizerNode parent, int nodeDepth, + int tokenStartOffset, int tokenOffset, int tokenLength) { + this.token = new ByteRange(); + reconstruct(builder, parent, nodeDepth, tokenStartOffset, tokenOffset, tokenLength); + this.children = Lists.newArrayList(); + } + + /* + * Sub-constructor for initializing all fields without allocating a new object. Used by the + * regular constructor. + */ + public void reconstruct(Tokenizer builder, TokenizerNode parent, int nodeDepth, + int tokenStartOffset, int tokenOffset, int tokenLength) { + this.builder = builder; + this.id = builder.nextNodeId(); + this.parent = parent; + this.nodeDepth = nodeDepth; + builder.submitMaxNodeDepthCandidate(nodeDepth); + this.tokenStartOffset = tokenStartOffset; + this.token.set(builder.tokens, tokenOffset, tokenLength); + this.numOccurrences = 1; + } + + /* + * Clear the state of this node so that it looks like it was just allocated. + */ + public void reset() { + builder = null; + parent = null; + nodeDepth = 0; + tokenStartOffset = 0; + token.clear(); + numOccurrences = 0; + children.clear();// branches & nubs + + // ids/offsets. used during writing to byte[] + id = 0; + firstInsertionIndex = -1;// set >=0 for nubs and leaves + negativeIndex = 0; + outputArrayOffset = -1; + } + + + /************************* building *********************************/ + + /* + *
  • Only public method used during the tokenization process + *
  • Requires that the input ByteRange sort after the previous, and therefore after all previous + * inputs + *
  • Only looks at bytes of the input array that align with this node's token + */ + public void addSorted(final ByteRange bytes) {// recursively build the tree + + /* + * Recurse deeper into the existing trie structure + */ + if (matchesToken(bytes) && CollectionUtils.notEmpty(children)) { + TokenizerNode lastChild = CollectionUtils.getLast(children); + if (lastChild.partiallyMatchesToken(bytes)) { + lastChild.addSorted(bytes); + return; + } + } + + /* + * Recursion ended. We must either + *
  • 1: increment numOccurrences if this input was equal to the previous + *
  • 2: convert this node from a leaf to a nub, and add a new child leaf + *
  • 3: split this node into a branch and leaf, and then add a second leaf + */ + + // add it as a child of this node + int numIdenticalTokenBytes = numIdenticalBytes(bytes);// should be <= token.length + int tailOffset = tokenStartOffset + numIdenticalTokenBytes; + int tailLength = bytes.getLength() - tailOffset; + + if (numIdenticalTokenBytes == token.getLength()) { + if (tailLength == 0) {// identical to this node (case 1) + incrementNumOccurrences(1); + } else {// identical to this node, but with a few extra tailing bytes. (leaf -> nub) (case 2) + int childNodeDepth = nodeDepth + 1; + int childTokenStartOffset = tokenStartOffset + numIdenticalTokenBytes; + TokenizerNode newChildNode = builder.addNode(this, childNodeDepth, childTokenStartOffset, + bytes, tailOffset); + addChild(newChildNode); + } + } else {//numIdenticalBytes > 0, split into branch/leaf and then add second leaf (case 3) + split(numIdenticalTokenBytes, bytes); + } + } + + + protected void addChild(TokenizerNode node) { + node.setParent(this); + children.add(node); + } + + + /** + * Called when we need to convert a leaf node into a branch with 2 leaves. Comments inside the + * method assume we have token BAA starting at tokenStartOffset=0 and are adding BOO. The output + * will be 3 nodes:
    + *
  • 1: B <- branch + *
  • 2: AA <- leaf + *
  • 3: OO <- leaf + * + * @param numTokenBytesToRetain => 1 (the B) + * @param bytes => BOO + */ + protected void split(int numTokenBytesToRetain, final ByteRange bytes) { + int childNodeDepth = nodeDepth; + int childTokenStartOffset = tokenStartOffset + numTokenBytesToRetain; + + //create leaf AA + TokenizerNode firstChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, + token, numTokenBytesToRetain); + firstChild.setNumOccurrences(numOccurrences);// do before clearing this node's numOccurrences + token.setLength(numTokenBytesToRetain);//shorten current token from BAA to B + numOccurrences = 0;//current node is now a branch + + moveChildrenToDifferentParent(firstChild);//point the new leaf (AA) to the new branch (B) + addChild(firstChild);//add the new leaf (AA) to the branch's (B's) children + + //create leaf OO + TokenizerNode secondChild = builder.addNode(this, childNodeDepth, childTokenStartOffset, + bytes, tokenStartOffset + numTokenBytesToRetain); + addChild(secondChild);//add the new leaf (00) to the branch's (B's) children + + // we inserted branch node B as a new level above/before the two children, so increment the + // depths of the children below + firstChild.incrementNodeDepthRecursively(); + secondChild.incrementNodeDepthRecursively(); + } + + + protected void incrementNodeDepthRecursively() { + ++nodeDepth; + builder.submitMaxNodeDepthCandidate(nodeDepth); + for (int i = 0; i < children.size(); ++i) { + children.get(i).incrementNodeDepthRecursively(); + } + } + + + protected void moveChildrenToDifferentParent(TokenizerNode newParent) { + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.setParent(newParent); + newParent.children.add(child); + } + children.clear(); + } + + + /************************ byte[] utils *************************/ + + protected boolean partiallyMatchesToken(ByteRange bytes) { + return numIdenticalBytes(bytes) > 0; + } + + protected boolean matchesToken(ByteRange bytes) { + return numIdenticalBytes(bytes) == getTokenLength(); + } + + protected int numIdenticalBytes(ByteRange bytes) { + return token.numEqualPrefixBytes(bytes, tokenStartOffset); + } + + + /***************** moving nodes around ************************/ + + public void appendNodesToExternalList(List appendTo, boolean includeNonLeaves, + boolean includeLeaves) { + if (includeNonLeaves && !isLeaf() || includeLeaves && isLeaf()) { + appendTo.add(this); + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.appendNodesToExternalList(appendTo, includeNonLeaves, includeLeaves); + } + } + + public int setInsertionIndexes(int nextIndex) { + int newNextIndex = nextIndex; + if (hasOccurrences()) { + setFirstInsertionIndex(nextIndex); + newNextIndex += numOccurrences; + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + newNextIndex = child.setInsertionIndexes(newNextIndex); + } + return newNextIndex; + } + + public void appendOutputArrayOffsets(List offsets) { + if (hasOccurrences()) { + offsets.add(outputArrayOffset); + } + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.appendOutputArrayOffsets(offsets); + } + } + + + /***************** searching *********************************/ + + /* + * Do a trie style search through the tokenizer. One option for looking up families or qualifiers + * during encoding, but currently unused in favor of tracking this information as they are added. + * + * Keeping code pending further performance testing. + */ + public void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, + int keyLength) { + int thisNodeDepthPlusLength = tokenStartOffset + token.getLength(); + + // quick check if the key is shorter than this node (may not work for binary search) + if (CollectionUtils.isEmpty(children)) { + if (thisNodeDepthPlusLength < keyLength) {// ran out of bytes + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + } + + // all token bytes must match + for (int i = 0; i < token.getLength(); ++i) { + if (key[tokenStartOffset + keyOffset + i] != token.get(i)) { + // TODO return whether it's before or after so we can binary search + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + } + + if (thisNodeDepthPlusLength == keyLength && numOccurrences > 0) { + resultHolder.set(TokenizerRowSearchPosition.MATCH, this);// MATCH + return; + } + + if (CollectionUtils.notEmpty(children)) { + // TODO binary search the children + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + child.getNode(resultHolder, key, keyOffset, keyLength); + if (resultHolder.isMatch()) { + return; + } else if (resultHolder.getDifference() == TokenizerRowSearchPosition.BEFORE) { + // passed it, so it doesn't exist + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + // key is still AFTER the current node, so continue searching + } + } + + // checked all children (or there were no children), and didn't find it + resultHolder.set(TokenizerRowSearchPosition.NO_MATCH, null); + return; + } + + + /****************** writing back to byte[]'s *************************/ + + public byte[] getNewByteArray() { + byte[] arrayToFill = new byte[tokenStartOffset + token.getLength()]; + fillInBytes(arrayToFill); + return arrayToFill; + } + + public void fillInBytes(byte[] arrayToFill) { + for (int i = 0; i < token.getLength(); ++i) { + arrayToFill[tokenStartOffset + i] = token.get(i); + } + if (parent != null) { + parent.fillInBytes(arrayToFill); + } + } + + + /************************** printing ***********************/ + + @Override + public String toString() { + String s = ""; + if (parent == null) { + s += "R "; + } else { + s += getBnlIndicator(false) + " " + Bytes.toString(parent.getNewByteArray()); + } + s += "[" + Bytes.toString(token.deepCopyToNewArray()) + "]"; + if (numOccurrences > 0) { + s += "x" + numOccurrences; + } + return s; + } + + public String getPaddedTokenAndOccurrenceString() { + StringBuilder sb = new StringBuilder(); + sb.append(getBnlIndicator(true)); + sb.append(Strings.padFront(numOccurrences + "", ' ', 3)); + sb.append(Strings.padFront(nodeDepth + "", ' ', 3)); + if (outputArrayOffset >= 0) { + sb.append(Strings.padFront(outputArrayOffset + "", ' ', 3)); + } + sb.append(" "); + for (int i = 0; i < tokenStartOffset; ++i) { + sb.append(" "); + } + sb.append(Bytes.toString(token.deepCopyToNewArray()).replaceAll(" ", "_")); + return sb.toString(); + } + + public String getBnlIndicator(boolean indent) { + if (indent) { + if (isNub()) { + return " N "; + } + return isBranch() ? "B " : " L"; + } + if (isNub()) { + return "N"; + } + return isBranch() ? "B" : "L"; + } + + + /********************** count different node types ********************/ + + public int getNumBranchNodesIncludingThisNode() { + if (isLeaf()) { + return 0; + } + int totalFromThisPlusChildren = isBranch() ? 1 : 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromThisPlusChildren += child.getNumBranchNodesIncludingThisNode(); + } + return totalFromThisPlusChildren; + } + + public int getNumNubNodesIncludingThisNode() { + if (isLeaf()) { + return 0; + } + int totalFromThisPlusChildren = isNub() ? 1 : 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromThisPlusChildren += child.getNumNubNodesIncludingThisNode(); + } + return totalFromThisPlusChildren; + } + + public int getNumLeafNodesIncludingThisNode() { + if (isLeaf()) { + return 1; + } + int totalFromChildren = 0; + for (int i = 0; i < children.size(); ++i) { + TokenizerNode child = children.get(i); + totalFromChildren += child.getNumLeafNodesIncludingThisNode(); + } + return totalFromChildren; + } + + + /*********************** simple read-only methods *******************************/ + + public int getNodeDepth() { + return nodeDepth; + } + + public int getTokenLength() { + return token.getLength(); + } + + public boolean hasOccurrences() { + return numOccurrences > 0; + } + + public boolean isRoot() { + return this.parent == null; + } + + public int getNumChildren() { + return CollectionUtils.nullSafeSize(children); + } + + public TokenizerNode getLastChild() { + if (CollectionUtils.isEmpty(children)) { + return null; + } + return CollectionUtils.getLast(children); + } + + public boolean isLeaf() { + return CollectionUtils.isEmpty(children) && hasOccurrences(); + } + + public boolean isBranch() { + return CollectionUtils.notEmpty(children) && !hasOccurrences(); + } + + public boolean isNub() { + return CollectionUtils.notEmpty(children) && hasOccurrences(); + } + + + /********************** simple mutation methods *************************/ + + /** + * Each occurrence > 1 indicates a repeat of the previous entry. This can be called directly by + * an external class without going through the process of detecting a repeat if it is a known + * repeat by some external mechanism. PtEncoder uses this when adding cells to a row if it knows + * the new cells are part of the current row. + * @param d increment by this amount + */ + public void incrementNumOccurrences(int d) { + numOccurrences += d; + } + + + /************************* autogenerated get/set ******************/ + + public int getTokenOffset() { + return tokenStartOffset; + } + + public TokenizerNode getParent() { + return parent; + } + + public ByteRange getToken() { + return token; + } + + public int getNumOccurrences() { + return numOccurrences; + } + + public void setParent(TokenizerNode parent) { + this.parent = parent; + } + + public void setNumOccurrences(int numOccurrences) { + this.numOccurrences = numOccurrences; + } + + public ArrayList getChildren() { + return children; + } + + public long getId() { + return id; + } + + public int getFirstInsertionIndex() { + return firstInsertionIndex; + } + + public void setFirstInsertionIndex(int firstInsertionIndex) { + this.firstInsertionIndex = firstInsertionIndex; + } + + public int getNegativeIndex() { + return negativeIndex; + } + + public void setNegativeIndex(int negativeIndex) { + this.negativeIndex = negativeIndex; + } + + public int getOutputArrayOffset() { + return outputArrayOffset; + } + + public void setOutputArrayOffset(int outputArrayOffset) { + this.outputArrayOffset = outputArrayOffset; + } + + public void setId(long id) { + this.id = id; + } + + public void setBuilder(Tokenizer builder) { + this.builder = builder; + } + + public void setTokenOffset(int tokenOffset) { + this.tokenStartOffset = tokenOffset; + } + + public void setToken(ByteRange token) { + this.token = token; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java new file mode 100644 index 0000000..6494ba1 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchPosition.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.tokenize; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * Warning: currently unused, but code is valid. Pending performance testing on more data sets. + * + * Where is the key relative to our current position in the tree. For example, the current tree node + * is "BEFORE" the key we are seeking + */ +@InterfaceAudience.Private +public enum TokenizerRowSearchPosition { + + AFTER,//the key is after this tree node, so keep searching + BEFORE,//in a binary search, this tells us to back up + MATCH,//the current node is a full match + NO_MATCH,//might as well return a value more informative than null + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java new file mode 100644 index 0000000..e7f5433 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/encode/tokenize/TokenizerRowSearchResult.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.encode.tokenize; + +import org.apache.hadoop.classification.InterfaceAudience; + + +/** + * for recursively searching a PtBuilder + */ +@InterfaceAudience.Private +public class TokenizerRowSearchResult{ + + /************ fields ************************/ + + protected TokenizerRowSearchPosition difference; + protected TokenizerNode matchingNode; + + + /*************** construct *****************/ + + public TokenizerRowSearchResult() { + } + + public TokenizerRowSearchResult(TokenizerRowSearchPosition difference) { + this.difference = difference; + } + + public TokenizerRowSearchResult(TokenizerNode matchingNode) { + this.difference = TokenizerRowSearchPosition.MATCH; + this.matchingNode = matchingNode; + } + + + /*************** methods **********************/ + + public boolean isMatch() { + return TokenizerRowSearchPosition.MATCH == difference; + } + + + /************* get/set ***************************/ + + public TokenizerRowSearchPosition getDifference() { + return difference; + } + + public TokenizerNode getMatchingNode() { + return matchingNode; + } + + public void set(TokenizerRowSearchPosition difference, TokenizerNode matchingNode) { + this.difference = difference; + this.matchingNode = matchingNode; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScanner.java new file mode 100644 index 0000000..29c9ff4 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellScanner.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.Cell; + +/** + * Alternate name may be CellInputStream + *

    + * An interface for iterating through a sequence of cells. Similar to Java's Iterator, but without + * the hasNext() or remove() methods. The hasNext() method is problematic because it may require + * actually loading the next object, which in turn requires storing the previous object somewhere. + * The core data block decoder should be as fast as possible, so we push the complexity and + * performance expense of concurrently tracking multiple cells to layers above the CellScanner. + *

    + * The getCurrentCell() method will return a reference to a Cell implementation. This reference may + * or may not point to a reusable cell implementation, so users of the CellScanner should not, for + * example, accumulate a List of Cells. All of the references may point to the same object, which + * would be the latest state of the underlying Cell. In short, the Cell is mutable. + *

    + * At a minimum, an implementation will need to be able to advance from one cell to the next in a + * LinkedList fashion. The nextQualifier(), nextFamily(), and nextRow() methods can all be + * implemented by calling nextCell(), however, if the DataBlockEncoding supports random access into + * the block then it may provide smarter versions of these methods. + *

    + * Typical usage: + * + *

    + * while (scanner.nextCell()) {
    + *   Cell cell = scanner.getCurrentCell();
    + *   // do something
    + * }
    + * 
    + */ +@InterfaceAudience.Private +public interface CellScanner{ + + /** + * Reset any state in the scanner so it appears it was freshly opened. + */ + void resetToBeforeFirstEntry(); + + /** + * @return the current Cell which may be mutable + */ + Cell getCurrent(); + + /** + * Advance the scanner 1 cell. + * @return true if the next cell is found and getCurrentCell() will return a valid Cell + */ + boolean next(); + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java new file mode 100644 index 0000000..7c8269f --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/CellSearcher.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellScannerPosition; + +/** + * Methods for seeking to a random {@link Cell} inside a sorted collection of cells. Indicates that + * the implementation is able to navigate between cells without iterating through every cell. + */ +@InterfaceAudience.Private +public interface CellSearcher extends ReversibleCellScanner { + + /** + * Do everything within this scanner's power to find the key. Look forward and backwards. + *

    + * Abort as soon as we know it can't be found, possibly leaving the Searcher in an invalid state. + *

    + * @param key position the CellScanner exactly on this key + * @return true if the cell existed and getCurrentCell() holds a valid cell + */ + boolean positionAt(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the previous key if there's no + * exact match. + *

    + * @param key position the CellScanner on this key or the closest cell before + * @return AT if exact match
    + * BEFORE if on last cell before key
    + * BEFORE_FIRST if key was before the first cell in this scanner's scope + */ + CellScannerPosition positionAtOrBefore(Cell key); + + /** + * Same as positionAt(..), but go to the extra effort of finding the next key if there's no exact + * match. + *

    + * @param key position the CellScanner on this key or the closest cell after + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition positionAtOrAfter(Cell key); + + /** + * Note: Added for backwards compatibility with + * {@link org.apache.hadoop.hbase.regionserver.KeyValueScanner#reseek} + *

    + * Look for the key, but only look after the current position. Probably not needed for an + * efficient tree implementation, but is important for implementations without random access such + * as unencoded KeyValue blocks. + *

    + * @param key position the CellScanner exactly on this key + * @return true if getCurrent() holds a valid cell + */ + boolean seekForwardTo(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

    + * @param key + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrBefore(Cell key); + + /** + * Same as seekForwardTo(..), but go to the extra effort of finding the next key if there's no + * exact match. + *

    + * @param key + * @return AT if exact match
    + * AFTER if on first cell after key
    + * AFTER_LAST if key was after the last cell in this scanner's scope + */ + CellScannerPosition seekForwardToOrAfter(Cell key); + + /** + * Note: This may not be appropriate to have in the interface. Need to investigate. + *

    + * Position the scanner in an invalid state after the last cell: CellScannerPosition.AFTER_LAST. + * This is used by tests and for handling certain edge cases. + */ + void positionAfterLastCell(); + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java new file mode 100644 index 0000000..9a6c37c --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/codec/prefixtree/scanner/ReversibleCellScanner.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.scanner; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * An extension of CellScanner indicating the scanner supports iterating backwards through cells. + *

    + * Note: This was not added to suggest that HBase should support client facing reverse Scanners, but + * because some {@link CellSearcher} implementations, namely PrefixTree, need a method of backing up + * if the positionAt(..) method goes past the requested cell. + */ +@InterfaceAudience.Private +public interface ReversibleCellScanner extends CellScanner { + + /** + * Try to position the scanner one Cell before the current position. + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
    + * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST} + */ + boolean previous(); + + /** + * Try to position the scanner in the row before the current row. + * @param endOfRow true for the last cell in the previous row; false for the first cell + * @return true if the operation was successful, meaning getCurrentCell() will return a valid + * Cell.
    + * false if there were no previous cells, meaning getCurrentCell() will return null. + * Scanner position will be {@link org.apache.hbase.cell.CellScannerPosition#BEFORE_FIRST} + */ + boolean previousRow(boolean endOfRow); + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java new file mode 100644 index 0000000..b2d1526 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/ByteRangeSet.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.byterange; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ArrayUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; + +import com.google.common.collect.Lists; + +/** + * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted + * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage. + *

    + * Current implementations are {@link org.apache.hbase.util.byterange.impl.ByteRangeHashSet} and + * {@link org.apache.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a + * trie-oriented ByteRangeTrieSet, etc + */ +@InterfaceAudience.Private +public abstract class ByteRangeSet { + + /******************** fields **********************/ + + protected byte[] byteAppender; + protected int numBytes; + + protected Map uniqueIndexByUniqueRange; + + protected ArrayList uniqueRanges; + protected int numUniqueRanges = 0; + + protected int[] uniqueRangeIndexByInsertionId; + protected int numInputs; + + protected List sortedIndexByUniqueIndex; + protected int[] sortedIndexByInsertionId; + protected ArrayList sortedRanges; + + + /****************** construct **********************/ + + protected ByteRangeSet() { + this.byteAppender = new byte[0]; + this.uniqueRanges = Lists.newArrayList(); + this.uniqueRangeIndexByInsertionId = new int[0]; + this.sortedIndexByUniqueIndex = Lists.newArrayList(); + this.sortedIndexByInsertionId = new int[0]; + this.sortedRanges = Lists.newArrayList(); + } + + public void reset() { + numBytes = 0; + uniqueIndexByUniqueRange.clear(); + numUniqueRanges = 0; + numInputs = 0; + sortedIndexByUniqueIndex.clear(); + sortedRanges.clear(); + } + + + /*************** abstract *************************/ + + public abstract void addToSortedRanges(); + + + /**************** methods *************************/ + + /** + * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and + * insert it into the tracking Map uniqueIndexByUniqueRange. + */ + public void add(ByteRange bytes) { + Integer index = uniqueIndexByUniqueRange.get(bytes); + if (index == null) { + index = store(bytes); + } + int minLength = numInputs + 1; + uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId, + minLength, 2 * minLength); + uniqueRangeIndexByInsertionId[numInputs] = index; + ++numInputs; + } + + protected int store(ByteRange bytes) { + int indexOfNewElement = numUniqueRanges; + if (uniqueRanges.size() <= numUniqueRanges) { + uniqueRanges.add(new ByteRange()); + } + ByteRange storedRange = uniqueRanges.get(numUniqueRanges); + int neededBytes = numBytes + bytes.getLength(); + byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes); + bytes.deepCopyTo(byteAppender, numBytes); + storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet + numBytes += bytes.getLength(); + uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement); + int newestUniqueIndex = numUniqueRanges; + ++numUniqueRanges; + return newestUniqueIndex; + } + + public ByteRangeSet compile() { + addToSortedRanges(); + for (int i = 0; i < sortedRanges.size(); ++i) { + sortedIndexByUniqueIndex.add(null);// need to grow the size + } + // TODO move this to an invert(int[]) util method + for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) { + int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i)); + sortedIndexByUniqueIndex.set(uniqueIndex, i); + } + sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs, + numInputs); + for (int i = 0; i < numInputs; ++i) { + int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i]; + int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex); + sortedIndexByInsertionId[i] = sortedIndex; + } + return this; + } + + public int getSortedIndexForInsertionId(int insertionId) { + return sortedIndexByInsertionId[insertionId]; + } + + public int size() { + return uniqueIndexByUniqueRange.size(); + } + + + /***************** standard methods ************************/ + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + int i = 0; + for (ByteRange r : sortedRanges) { + if (i > 0) { + sb.append("\n"); + } + sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray())); + ++i; + } + sb.append("\ntotalSize:" + numBytes); + sb.append("\navgSize:" + getAvgSize()); + return sb.toString(); + } + + + /**************** get/set *****************************/ + + public ArrayList getSortedRanges() { + return sortedRanges; + } + + public long getAvgSize() { + return numBytes / numUniqueRanges; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java new file mode 100644 index 0000000..8787f39 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeHashSet.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.byterange.impl; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hbase.util.byterange.ByteRangeSet; + +/** + * This is probably the best implementation of ByteRangeSet at the moment, though a HashMap produces + * garbage when adding a new element to it. We can probably create a tighter implementation without + * pointers or garbage. + */ +@InterfaceAudience.Private +public class ByteRangeHashSet extends ByteRangeSet { + + /************************ constructors *****************************/ + + public ByteRangeHashSet() { + this.uniqueIndexByUniqueRange = new HashMap(); + } + + public ByteRangeHashSet(List rawByteArrays) { + for (ByteRange in : IterableUtils.nullSafe(rawByteArrays)) { + add(in); + } + } + + @Override + public void addToSortedRanges() { + sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); + Collections.sort(sortedRanges); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java new file mode 100644 index 0000000..9499e56 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/byterange/impl/ByteRangeTreeSet.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.byterange.impl; + +import java.util.List; +import java.util.TreeMap; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hbase.util.byterange.ByteRangeSet; + +/** + * Not currently used in production, but here as a benchmark comparison against ByteRangeHashSet. + */ +@InterfaceAudience.Private +public class ByteRangeTreeSet extends ByteRangeSet { + + /************************ constructors *****************************/ + + public ByteRangeTreeSet() { + this.uniqueIndexByUniqueRange = new TreeMap(); + } + + public ByteRangeTreeSet(List rawByteArrays) { + this();//needed to initialize the TreeSet + for(ByteRange in : IterableUtils.nullSafe(rawByteArrays)){ + add(in); + } + } + + @Override + public void addToSortedRanges() { + sortedRanges.addAll(CollectionUtils.nullSafe(uniqueIndexByUniqueRange.keySet())); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java new file mode 100644 index 0000000..278ac55 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UFIntTool.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * UFInt is an abbreviation for Unsigned Fixed-width Integer. + * + * This class converts between positive ints and 1-4 bytes that represent the int. All input ints + * must be positive. Max values stored in N bytes are: + * + * N=1: 2^8 => 256 + * N=2: 2^16 => 65,536 + * N=3: 2^24 => 16,777,216 + * N=4: 2^31 => 2,147,483,648 (Integer.MAX_VALUE) + * + * This was created to get most of the memory savings of a variable length integer when encoding + * an array of input integers, but to fix the number of bytes for each integer to the number needed + * to store the maximum integer in the array. This enables a binary search to be performed on the + * array of encoded integers. + * + * PrefixTree nodes often store offsets into a block that can fit into 1 or 2 bytes. Note that if + * the maximum value of an array of numbers needs 2 bytes, then it's likely that a majority of the + * numbers will also require 2 bytes. + * + * warnings: + * * no input validation for max performance + * * no negatives + */ +@InterfaceAudience.Private +public class UFIntTool { + + private static final int NUM_BITS_IN_LONG = 64; + + public static long maxValueForNumBytes(int numBytes) { + return (1L << (numBytes * 8)) - 1; + } + + public static int numBytes(final long value) { + if (value == 0) {// 0 doesn't work with the formula below + return 1; + } + return (NUM_BITS_IN_LONG + 7 - Long.numberOfLeadingZeros(value)) / 8; + } + + public static byte[] getBytes(int outputWidth, final long value) { + byte[] bytes = new byte[outputWidth]; + writeBytes(outputWidth, value, bytes, 0); + return bytes; + } + + public static void writeBytes(int outputWidth, final long value, byte[] bytes, int offset) { + bytes[offset + outputWidth - 1] = (byte) value; + for (int i = outputWidth - 2; i >= 0; --i) { + bytes[offset + i] = (byte) (value >>> (outputWidth - i - 1) * 8); + } + } + + private static final long[] MASKS = new long[] { + (long) 255, + (long) 255 << 8, + (long) 255 << 16, + (long) 255 << 24, + (long) 255 << 32, + (long) 255 << 40, + (long) 255 << 48, + (long) 255 << 56 + }; + + public static void writeBytes(int outputWidth, final long value, OutputStream os) throws IOException { + for (int i = outputWidth - 1; i >= 0; --i) { + os.write((byte) ((value & MASKS[i]) >>> (8 * i))); + } + } + + public static long fromBytes(final byte[] bytes) { + long value = 0; + value |= bytes[0] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < bytes.length; ++i) { + value <<= 8; + value |= bytes[i] & 0xff; + } + return value; + } + + public static long fromBytes(final byte[] bytes, final int offset, final int width) { + long value = 0; + value |= bytes[0 + offset] & 0xff;// these seem to do ok without casting the byte to int + for (int i = 1; i < width; ++i) { + value <<= 8; + value |= bytes[i + offset] & 0xff; + } + return value; + } + +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java new file mode 100644 index 0000000..c0d29e4 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVIntTool.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left + * bit of the current byte is 1, then there is at least one more byte. + */ +@InterfaceAudience.Private +public class UVIntTool { + + public static final byte + BYTE_7_RIGHT_BITS_SET = 127, + BYTE_LEFT_BIT_SET = -128; + + public static final long + INT_7_RIGHT_BITS_SET = 127, + INT_8TH_BIT_SET = 128; + + public static final byte[] + MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, 7 }; + + /********************* int -> bytes **************************/ + + public static int numBytes(int in) { + if (in == 0) { + // doesn't work with the formula below + return 1; + } + return (38 - Integer.numberOfLeadingZeros(in)) / 7;// 38 comes from 32+(7-1) + } + + public static byte[] getBytes(int value) { + int numBytes = numBytes(value); + byte[] bytes = new byte[numBytes]; + int remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + bytes[i] = (byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET); + remainder >>= 7; + } + // do not set the left bit + bytes[numBytes - 1] = (byte) (remainder & INT_7_RIGHT_BITS_SET); + return bytes; + } + + public static int writeBytes(int value, OutputStream os) throws IOException { + int numBytes = numBytes(value); + int remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + os.write((byte) ((remainder & INT_7_RIGHT_BITS_SET) | INT_8TH_BIT_SET)); + remainder >>= 7; + } + // do not set the left bit + os.write((byte) (remainder & INT_7_RIGHT_BITS_SET)); + return numBytes; + } + + /******************** bytes -> int **************************/ + + public static int getInt(byte[] bytes) { + return getInt(bytes, 0); + } + + public static int getInt(byte[] bytes, int offset) { + int value = 0; + for (int i = 0;; ++i) { + byte b = bytes[offset + i]; + int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + if (b >= 0) { + break; + } + } + return value; + } + + public static int getInt(InputStream is) throws IOException { + int value = 0; + int i = 0; + int b; + do{ + b = is.read(); + int shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + ++i; + }while(b > Byte.MAX_VALUE); + return value; + } +} diff --git a/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java new file mode 100644 index 0000000..ec95ae8 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hbase/util/vint/UVLongTool.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Simple Variable Length Integer encoding. Left bit of 0 means we are on the last byte. If left + * bit of the current byte is 1, then there is at least one more byte. + */ +@InterfaceAudience.Private +public class UVLongTool{ + + public static final byte + BYTE_7_RIGHT_BITS_SET = 127, + BYTE_LEFT_BIT_SET = -128; + + public static final long + LONG_7_RIGHT_BITS_SET = 127, + LONG_8TH_BIT_SET = 128; + + public static final byte[] + MAX_VALUE_BYTES = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1, 127 }; + + + /********************* long -> bytes **************************/ + + public static int numBytes(long in) {// do a check for illegal arguments if not protected + if (in == 0) { + return 1; + }// doesn't work with the formula below + return (70 - Long.numberOfLeadingZeros(in)) / 7;// 70 comes from 64+(7-1) + } + + public static byte[] getBytes(long value) { + int numBytes = numBytes(value); + byte[] bytes = new byte[numBytes]; + long remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + bytes[i] = (byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET);// set the left bit + remainder >>= 7; + } + bytes[numBytes - 1] = (byte) (remainder & LONG_7_RIGHT_BITS_SET);// do not set the left bit + return bytes; + } + + public static int writeBytes(long value, OutputStream os) throws IOException { + int numBytes = numBytes(value); + long remainder = value; + for (int i = 0; i < numBytes - 1; ++i) { + // set the left bit + os.write((byte) ((remainder & LONG_7_RIGHT_BITS_SET) | LONG_8TH_BIT_SET)); + remainder >>= 7; + } + // do not set the left bit + os.write((byte) (remainder & LONG_7_RIGHT_BITS_SET)); + return numBytes; + } + + /******************** bytes -> long **************************/ + + public static long getLong(byte[] bytes) { + return getLong(bytes, 0); + } + + public static long getLong(byte[] bytes, int offset) { + long value = 0; + for (int i = 0;; ++i) { + byte b = bytes[offset + i]; + long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + if (b >= 0) { + break; + }// first bit was 0, so that's the last byte in the VarLong + } + return value; + } + + public static long getLong(InputStream is) throws IOException { + long value = 0; + int i = 0; + int b; + do { + b = is.read(); + long shifted = BYTE_7_RIGHT_BITS_SET & b;// kill leftmost bit + shifted <<= 7 * i; + value |= shifted; + ++i; + } while (b > Byte.MAX_VALUE); + return value; + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java new file mode 100644 index 0000000..e2e97a1 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/keyvalue/TestKeyValueTool.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.keyvalue; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hbase.codec.prefixtree.row.TestRowData; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestKeyValueTool { + + @Parameters + public static Collection parameters() { + return new TestRowData.InMemory().getAllAsObjectArray(); + } + + private TestRowData rows; + + public TestKeyValueTool(TestRowData testRows) { + this.rows = testRows; + } + + @Test + public void testRoundTripToBytes() { + List kvs = rows.getInputs(); + ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false); + List roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false); + Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray()); + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java new file mode 100644 index 0000000..04087ea --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/PrefixTreeTestConstants.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree; + +import org.apache.hadoop.hbase.util.Bytes; + +public class PrefixTreeTestConstants { + + public static final byte[] TEST_CF = Bytes.toBytes("cfDefault"); + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java new file mode 100644 index 0000000..688b65a --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/blockmeta/TestBlockMeta.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.blockmeta; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.junit.Assert; +import org.junit.Test; + +public class TestBlockMeta { + + static int BLOCK_START = 123; + + private static PrefixTreeBlockMeta createSample() { + PrefixTreeBlockMeta m = new PrefixTreeBlockMeta(); + m.setNumMetaBytes(0); + m.setNumKeyValueBytes(3195); + + m.setNumRowBytes(0); + m.setNumFamilyBytes(3); + m.setNumQualifierBytes(12345); + m.setNumTimestampBytes(23456); + m.setNumMvccVersionBytes(5); + m.setNumValueBytes(34567); + + m.setNextNodeOffsetWidth(3); + m.setFamilyOffsetWidth(1); + m.setQualifierOffsetWidth(2); + m.setTimestampIndexWidth(1); + m.setMvccVersionIndexWidth(2); + m.setValueOffsetWidth(8); + m.setValueLengthWidth(3); + + m.setRowTreeDepth(11); + m.setMaxRowLength(200); + m.setMaxQualifierLength(50); + + m.setMinTimestamp(1318966363481L); + m.setTimestampDeltaWidth(3); + m.setMinMvccVersion(100L); + m.setMvccVersionDeltaWidth(4); + + m.setAllSameType(false); + m.setAllTypes(KeyValue.Type.Delete.getCode()); + + m.setNumUniqueRows(88); + m.setNumUniqueFamilies(1); + m.setNumUniqueQualifiers(56); + return m; + } + + @Test + public void testStreamSerialization() throws IOException { + PrefixTreeBlockMeta original = createSample(); + ByteArrayOutputStream os = new ByteArrayOutputStream(10000); + original.writeVariableBytesToOutputStream(os); + ByteBuffer buffer = ByteBuffer.wrap(os.toByteArray()); + PrefixTreeBlockMeta roundTripped = new PrefixTreeBlockMeta(buffer); + Assert.assertTrue(original.equals(roundTripped)); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java new file mode 100644 index 0000000..2be40dd --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizer.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.builder; + +import java.util.Collection; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerRowSearchResult; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestTokenizer { + + @Parameters + public static Collection parameters() { + return new TestTokenizerData.InMemory().getAllAsObjectArray(); + } + + private List inputs; + private Tokenizer builder; + private List roundTripped; + + public TestTokenizer(TestTokenizerData sortedByteArrays) { + this.inputs = sortedByteArrays.getInputs(); + this.builder = new Tokenizer(); + for (byte[] array : inputs) { + builder.addSorted(new ByteRange(array)); + } + this.roundTripped = builder.getArrays(); + } + + @Test + public void testReaderRoundTrip() { + Assert.assertEquals(inputs.size(), roundTripped.size()); + Assert.assertTrue(Bytes.isSorted(roundTripped)); + Assert.assertTrue(Bytes.equals(inputs, roundTripped)); + } + + @Test + public void testSearching() { + for (byte[] input : inputs) { + TokenizerRowSearchResult resultHolder = new TokenizerRowSearchResult(); + builder.getNode(resultHolder, input, 0, input.length); + TokenizerNode n = resultHolder.getMatchingNode(); + byte[] output = n.getNewByteArray(); + Assert.assertTrue(Bytes.equals(input, output)); + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java new file mode 100644 index 0000000..b6db64a --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTokenizerData.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.builder; + +import java.util.Collection; +import java.util.List; + +import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataBasic; +import org.apache.hbase.codec.prefixtree.builder.data.TestTokenizerDataEdgeCase; + +import com.google.common.collect.Lists; + +public interface TestTokenizerData { + + List getInputs(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestTokenizerDataBasic() }); + all.add(new Object[] { new TestTokenizerDataEdgeCase() }); + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java new file mode 100644 index 0000000..8049562 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/TestTreeDepth.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.builder; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.junit.Test; +import org.mortbay.log.Log; + +import com.google.common.collect.Lists; + +public class TestTreeDepth { + + @Test + public void testSingleNode() { + List inputs = Lists.newArrayList("a"); + testInternal(inputs, 1); + } + + @Test + public void testSimpleBranch() { + List inputs = Lists.newArrayList("a", "aa", "ab"); + testInternal(inputs, 2); + } + + @Test + public void testEmptyRoot() { + List inputs = Lists.newArrayList("a", "b"); + testInternal(inputs, 2); + } + + @Test + public void testRootAsNub() { + List inputs = Lists.newArrayList("a", "aa"); + testInternal(inputs, 2); + } + + @Test + public void testRootAsNubPlusNub() { + List inputs = Lists.newArrayList("a", "aa", "aaa"); + testInternal(inputs, 3); + } + + @Test + public void testEmptyRootPlusNub() { + List inputs = Lists.newArrayList("a", "aa", "b"); + testInternal(inputs, 3); + } + + @Test + public void testSplitDistantAncestor() { + List inputs = Lists.newArrayList("a", "ac", "acd", "b"); + testInternal(inputs, 4); + } + + protected void testInternal(List inputs, int expectedTreeDepth) { + Log.warn("init logger"); + Tokenizer builder = new Tokenizer(); + for (String s : inputs) { + ByteRange b = new ByteRange(Bytes.toBytes(s)); + builder.addSorted(b); + } + Assert.assertEquals(1, builder.getRoot().getNodeDepth()); + Assert.assertEquals(expectedTreeDepth, builder.getTreeDepth()); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java new file mode 100644 index 0000000..d717999 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataBasic.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.builder.data; + +import java.util.List; + +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData; + +import com.google.common.collect.Lists; + +public class TestTokenizerDataBasic implements TestTokenizerData { + + static List d = Lists.newArrayList(); + static { + List s = Lists.newArrayList(); + s.add("abc");// nub + s.add("abcde");// leaf + s.add("bbc");// causes root to split and have empty token + s.add("bbc");// makes numOccurrences=2 on the bbc node + s.add("cd");// just to get another node after the numOccurrences=2 + d = Bytes.getUtf8ByteArrays(s); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public List getOutputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java new file mode 100644 index 0000000..4c26649 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/builder/data/TestTokenizerDataEdgeCase.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.builder.data; + +import java.util.List; + +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.builder.TestTokenizerData; + +import com.google.common.collect.Lists; + +public class TestTokenizerDataEdgeCase implements TestTokenizerData { + + static List d = Lists.newArrayList(); + static { + /* + * tricky little combination because the acegi token will partially match abdfi, but when you + * descend into abdfi, it will not fully match + */ + List s = Lists.newArrayList(); + s.add("abdfh"); + s.add("abdfi"); + s.add("acegi"); + d = Bytes.getUtf8ByteArrays(s); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public List getOutputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java new file mode 100644 index 0000000..d006fee --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnBuilder.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.column; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.decode.column.ColumnReader; +import org.apache.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; +import org.apache.hbase.codec.prefixtree.encode.tokenize.Tokenizer; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class TestColumnBuilder { + + @Parameters + public static Collection parameters() { + return new TestColumnData.InMemory().getAllAsObjectArray(); + } + + /*********** fields **********************************/ + + protected TestColumnData columns; + protected ByteRangeTreeSet columnSorter; + protected List sortedUniqueColumns; + protected PrefixTreeBlockMeta blockMeta; + protected Tokenizer builder; + protected ColumnSectionWriter writer; + protected byte[] bytes; + protected byte[] buffer; + protected ColumnReader reader; + + /*************** construct ****************************/ + + public TestColumnBuilder(TestColumnData columns) { + this.columns = columns; + List inputs = columns.getInputs(); + this.columnSorter = new ByteRangeTreeSet(inputs); + this.sortedUniqueColumns = columnSorter.compile().getSortedRanges(); + List copies = ByteRangeTool.copyToNewArrays(sortedUniqueColumns); + Assert.assertTrue(Bytes.isSorted(copies)); + this.blockMeta = new PrefixTreeBlockMeta(); + this.blockMeta.setNumMetaBytes(0); + this.blockMeta.setNumRowBytes(0); + this.builder = new Tokenizer(); + } + + /************* methods ********************************/ + + @Test + public void testReaderRoundTrip() throws IOException { + for (int i = 0; i < sortedUniqueColumns.size(); ++i) { + ByteRange column = sortedUniqueColumns.get(i); + builder.addSorted(column); + } + List builderOutputArrays = builder.getArrays(); + for (int i = 0; i < builderOutputArrays.size(); ++i) { + byte[] inputArray = sortedUniqueColumns.get(i).deepCopyToNewArray(); + byte[] outputArray = builderOutputArrays.get(i); + boolean same = Bytes.equals(inputArray, outputArray); + Assert.assertTrue(same); + } + Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size()); + + writer = new ColumnSectionWriter(blockMeta, builder, false); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writer.compile().writeBytes(baos); + bytes = baos.toByteArray(); + buffer = new byte[blockMeta.getMaxQualifierLength()]; + reader = new ColumnReader(buffer, false); + reader.initOnBlock(blockMeta, bytes); + + List builderNodes = Lists.newArrayList(); + builder.appendNodes(builderNodes, true, true); + int i = 0; + for (TokenizerNode builderNode : builderNodes) { + if (!builderNode.hasOccurrences()) { + continue; + } + Assert.assertEquals(1, builderNode.getNumOccurrences());// we de-duped before adding to + // builder + int position = builderNode.getOutputArrayOffset(); + byte[] output = reader.populateBuffer(position).copyBufferToNewArray(); + boolean same = Bytes.equals(sortedUniqueColumns.get(i).deepCopyToNewArray(), output); + Assert.assertTrue(same); + ++i; + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java new file mode 100644 index 0000000..522a8ad --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/TestColumnData.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.column; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataRandom; +import org.apache.hbase.codec.prefixtree.column.data.TestColumnDataSimple; + +import com.google.common.collect.Lists; + +public interface TestColumnData { + + List getInputs(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestColumnDataSimple() }); + for (int leftShift = 0; leftShift < 16; ++leftShift) { + all.add(new Object[] { new TestColumnDataRandom(1 << leftShift) }); + } + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java new file mode 100644 index 0000000..7a66a73 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataRandom.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.column.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; +import org.apache.hbase.codec.prefixtree.column.TestColumnData; +import org.apache.hbase.util.byterange.ByteRangeSet; +import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; + +import com.google.common.collect.Lists; + +public class TestColumnDataRandom implements TestColumnData { + + private List inputs = Lists.newArrayList(); + private List outputs = Lists.newArrayList(); + + public TestColumnDataRandom(int numColumns) { + RedundantKVGenerator generator = new RedundantKVGenerator(); + ByteRangeSet sortedColumns = new ByteRangeTreeSet(); + List d = generator.generateTestKeyValues(numColumns); + for (KeyValue col : d) { + ByteRange colRange = new ByteRange(col.getQualifier()); + inputs.add(colRange); + sortedColumns.add(colRange); + } + for (ByteRange col : sortedColumns.compile().getSortedRanges()) { + outputs.add(col); + } + } + + @Override + public List getInputs() { + return inputs; + } + + @Override + public List getOutputs() { + return outputs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java new file mode 100644 index 0000000..ad5a2cd --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/column/data/TestColumnDataSimple.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.column.data; + +import java.util.List; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.ByteRangeTool; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.column.TestColumnData; + +import com.google.common.collect.Lists; + +public class TestColumnDataSimple implements TestColumnData { + + @Override + public List getInputs() { + List d = Lists.newArrayList(); + d.add("abc"); + d.add("abcde"); + d.add("abc"); + d.add("bbc"); + d.add("abc"); + return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + } + + @Override + public List getOutputs() { + List d = Lists.newArrayList(); + d.add("abc"); + d.add("abcde"); + d.add("bbc"); + return ByteRangeTool.fromArrays(Bytes.getUtf8ByteArrays(d)); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java new file mode 100644 index 0000000..1df6b05 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/BaseTestRowData.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +public abstract class BaseTestRowData implements TestRowData { + + @Override + public List getRowStartIndexes() { + List rowStartIndexes = Lists.newArrayList(); + rowStartIndexes.add(0); + List inputs = getInputs(); + for (int i = 1; i < inputs.size(); ++i) { + KeyValue lastKv = inputs.get(i - 1); + KeyValue kv = inputs.get(i); + if (!CellComparator.equalsRow(lastKv, kv)) { + rowStartIndexes.add(i); + } + } + return rowStartIndexes; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java new file mode 100644 index 0000000..e51235d --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestPrefixTreeSearcher.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.codec.prefixtree.decode.DecoderFactory; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestPrefixTreeSearcher { + + protected static int BLOCK_START = 7; + + @Parameters + public static Collection parameters() { + return new TestRowData.InMemory().getAllAsObjectArray(); + } + + protected TestRowData rows; + protected ByteBuffer block; + + public TestPrefixTreeSearcher(TestRowData testRows) throws IOException { + this.rows = testRows; + ByteArrayOutputStream os = new ByteArrayOutputStream(1 << 20); + PrefixTreeEncoder kvBuilder = new PrefixTreeEncoder(os, true); + for (KeyValue kv : rows.getInputs()) { + kvBuilder.write(kv); + } + kvBuilder.flush(); + byte[] outputBytes = os.toByteArray(); + this.block = ByteBuffer.wrap(outputBytes); + } + + + @Test + public void testScanForwards() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + + int i = -1; + while (searcher.next()) { + ++i; + KeyValue inputCell = rows.getInputs().get(i); + Cell outputCell = searcher.getCurrent(); + + // check all 3 permutations of equals() + Assert.assertEquals(inputCell, outputCell); + Assert.assertEquals(outputCell, inputCell); + Assert.assertTrue(CellComparator.equals(inputCell, outputCell)); + } + Assert.assertEquals(rows.getInputs().size(), i + 1); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testScanBackwards() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + searcher.positionAfterLastCell(); + int i = -1; + while (searcher.previous()) { + ++i; + int oppositeIndex = rows.getInputs().size() - i - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent()); + Assert.assertEquals(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), i + 1); + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testRandomSeekHits() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + for (KeyValue kv : rows.getInputs()) { + boolean hit = searcher.positionAt(kv); + Assert.assertTrue(hit); + Cell foundKv = searcher.getCurrent(); + Assert.assertTrue(CellComparator.equals(kv, foundKv)); + } + } finally { + DecoderFactory.checkIn(searcher); + } + } + + /** + * very hard to test nubs with this thing since the a nextRowKey function will usually skip them + */ + @Test + public void testRandomSeekMisses() throws IOException { + CellSearcher searcher = null; + List rowStartIndexes = rows.getRowStartIndexes(); + try { + searcher = DecoderFactory.checkOut(block, true); + for (int i=0; i < rows.getInputs().size(); ++i) { + KeyValue kv = rows.getInputs().get(i); + + //nextRow + KeyValue inputNextRow = KeyValueTool.createFirstKeyInNextRow(kv); + + CellScannerPosition position = searcher.positionAtOrBefore(inputNextRow); + boolean isFirstInRow = rowStartIndexes.contains(i); + if(isFirstInRow){ + int rowIndex = rowStartIndexes.indexOf(i); + if(rowIndex < rowStartIndexes.size() - 1){ +// int lastKvInRowI = rowStartIndexes.get(rowIndex + 1) - 1; + Assert.assertEquals(CellScannerPosition.BEFORE, position); + /* + * Can't get this to work between nubs like rowB\x00 <-> rowBB + * + * No reason to doubt that it works, but will have to come up with a smarter test. + */ +// Assert.assertEquals(rows.getInputs().get(lastKvInRowI), searcher.getCurrentCell()); + } + } + + //previous KV + KeyValue inputPreviousKv = KeyValueTool.previousKey(kv); + boolean hit = searcher.positionAt(inputPreviousKv); + Assert.assertFalse(hit); + position = searcher.positionAtOrAfter(inputPreviousKv); + if(CollectionUtils.isLastIndex(rows.getInputs(), i)){ + Assert.assertTrue(CellScannerPosition.AFTER_LAST == position); + }else{ + Assert.assertTrue(CellScannerPosition.AFTER == position); + /* + * TODO: why i+1 instead of i? + */ + Assert.assertEquals(rows.getInputs().get(i+1), searcher.getCurrent()); + } + } + } finally { + DecoderFactory.checkIn(searcher); + } + } + + + @Test + public void testRandomSeekIndividualAssertions() throws IOException { + CellSearcher searcher = null; + try { + searcher = DecoderFactory.checkOut(block, true); + rows.individualSearcherAssertions(searcher); + } finally { + DecoderFactory.checkIn(searcher); + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java new file mode 100644 index 0000000..92ff672 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowData.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row; + +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataComplexQualifiers; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDeeper; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataDifferentTimestamps; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataEmpty; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataExerciseFInts; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataMultiFamilies; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNub; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSimple; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataTrivial; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrls; +import org.apache.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +/* + * A master class for registering different implementations of TestRowData. + */ +public interface TestRowData { + + List getInputs(); + List getRowStartIndexes(); + + void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta); + + void individualSearcherAssertions(CellSearcher searcher); + + public static class InMemory { + + /* + * The following are different styles of data that the codec may encounter. Having these small + * representations of the data helps pinpoint what is wrong if the encoder breaks. + */ + public static Collection getAll() { + List all = Lists.newArrayList(); + //simple + all.add(new TestRowDataEmpty()); + all.add(new TestRowDataTrivial()); + all.add(new TestRowDataSimple()); + all.add(new TestRowDataDeeper()); + + //more specific + all.add(new TestRowDataSingleQualifier()); + all.add(new TestRowDataMultiFamilies()); + all.add(new TestRowDataNub()); + all.add(new TestRowDataSearcherRowMiss()); + all.add(new TestRowDataQualifierByteOrdering()); + all.add(new TestRowDataComplexQualifiers()); + all.add(new TestRowDataDifferentTimestamps()); + + //larger data volumes (hard to debug) + all.add(new TestRowDataNumberStrings()); + all.add(new TestRowDataUrls()); + all.add(new TestRowDataUrlsExample()); + all.add(new TestRowDataExerciseFInts()); + all.add(new TestRowDataRandomKeyValues()); + return all; + } + + public static Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + for (TestRowData testRows : getAll()) { + all.add(new Object[] { testRows }); + } + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java new file mode 100644 index 0000000..76c5a55 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/TestRowEncoder.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTool; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.Cell; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import com.google.common.collect.Lists; + +@RunWith(Parameterized.class) +public class TestRowEncoder { + + protected static int BLOCK_START = 7; + + @Parameters + public static Collection parameters() { + List parameters = Lists.newArrayList(); + for (TestRowData testRows : TestRowData.InMemory.getAll()) { + parameters.add(new Object[] { testRows }); + } + return parameters; + } + + protected TestRowData rows; + protected List inputKvs; + protected boolean includeMemstoreTS = true; + protected ByteArrayOutputStream os; + protected PrefixTreeEncoder encoder; + protected int totalBytes; + protected PrefixTreeBlockMeta blockMetaWriter; + protected byte[] outputBytes; + protected ByteBuffer buffer; + protected ByteArrayInputStream is; + protected PrefixTreeBlockMeta blockMetaReader; + protected byte[] inputBytes; + protected PrefixTreeArraySearcher searcher; + + public TestRowEncoder(TestRowData testRows) { + this.rows = testRows; + } + + @Before + public void compile() throws IOException { + os = new ByteArrayOutputStream(1 << 20); + encoder = new PrefixTreeEncoder(os, includeMemstoreTS); + + inputKvs = rows.getInputs(); + for (KeyValue kv : inputKvs) { + encoder.write(kv); + } + encoder.flush(); + totalBytes = encoder.getTotalBytes(); + blockMetaWriter = encoder.getBlockMeta(); + outputBytes = os.toByteArray(); + + // start reading, but save the assertions for @Test methods + buffer = ByteBuffer.wrap(outputBytes); + blockMetaReader = new PrefixTreeBlockMeta(buffer); + + searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), + blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength()); + searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); + } + + @Test + public void testEncoderOutput() throws IOException { + Assert.assertEquals(totalBytes, outputBytes.length); + Assert.assertEquals(blockMetaWriter, blockMetaReader); + } + + @Test + public void testForwardScanner() { + int counter = -1; + while (searcher.next()) { + ++counter; + KeyValue inputKv = rows.getInputs().get(counter); + KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent()); + assertKeyAndValueEqual(inputKv, outputKv); + } + // assert same number of cells + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + + /** + * probably not needed since testReverseScannerWithJitter() below is more thorough + */ + @Test + public void testReverseScanner() { + searcher.positionAfterLastCell(); + int counter = -1; + while (searcher.previous()) { + ++counter; + int oppositeIndex = rows.getInputs().size() - counter - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent()); + assertKeyAndValueEqual(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + + /** + * Exercise the nubCellsRemain variable by calling next+previous. NubCellsRemain is basically + * a special fan index. + */ + @Test + public void testReverseScannerWithJitter() { + searcher.positionAfterLastCell(); + int counter = -1; + while (true) { + boolean foundCell = searcher.previous(); + if (!foundCell) { + break; + } + ++counter; + + // a next+previous should cancel out + if (!searcher.isAfterLast()) { + searcher.next(); + searcher.previous(); + } + + int oppositeIndex = rows.getInputs().size() - counter - 1; + KeyValue inputKv = rows.getInputs().get(oppositeIndex); + KeyValue outputKv = KeyValueTool.copyToNewKeyValue(searcher.getCurrent()); + assertKeyAndValueEqual(inputKv, outputKv); + } + Assert.assertEquals(rows.getInputs().size(), counter + 1); + } + + @Test + public void testIndividualBlockMetaAssertions() { + rows.individualBlockMetaAssertions(blockMetaReader); + } + + + /**************** helper **************************/ + + protected void assertKeyAndValueEqual(Cell expected, Cell actual) { + // assert keys are equal (doesn't compare values) + Assert.assertEquals(expected, actual); + if (includeMemstoreTS) { + Assert.assertEquals(expected.getMvccVersion(), actual.getMvccVersion()); + } + // assert values equal + Assert.assertTrue(Bytes.equals(expected.getValueArray(), expected.getValueOffset(), + expected.getValueLength(), actual.getValueArray(), actual.getValueOffset(), + actual.getValueLength())); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java new file mode 100644 index 0000000..8f9fa33 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataComplexQualifiers.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataComplexQualifiers extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + cf = PrefixTreeTestConstants.TEST_CF, + v0 = Bytes.toBytes("v0"); + + static List qualifiers = Lists.newArrayList(); + static { + List qualifierStrings = Lists.newArrayList(); + qualifierStrings.add("cq"); + qualifierStrings.add("cq0"); + qualifierStrings.add("cq1"); + qualifierStrings.add("cq2"); + qualifierStrings.add("dq0");// second root level fan + qualifierStrings.add("dq1");// nub + qualifierStrings.add("dq111");// leaf on nub + qualifierStrings.add("dq11111a");// leaf on leaf + for (String s : qualifierStrings) { + qualifiers.add(Bytes.toBytes(s)); + } + } + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + for (byte[] qualifier : qualifiers) { + d.add(new KeyValue(Arow, cf, qualifier, ts, v0)); + } + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java new file mode 100644 index 0000000..8cc3169 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDeeper.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +/* + * Goes beyond a trivial trie to add a branch on the "cf" node + */ +public class TestRowDataDeeper extends BaseTestRowData{ + + static byte[] + cdc = Bytes.toBytes("cdc"), + cf6 = Bytes.toBytes("cf6"), + cfc = Bytes.toBytes("cfc"), + f = Bytes.toBytes("f"), + q = Bytes.toBytes("q"), + v = Bytes.toBytes("v"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(cdc, f, q, ts, v)); + d.add(new KeyValue(cf6, f, q, ts, v)); + d.add(new KeyValue(cfc, f, q, ts, v)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + //0: token:c; fan:d,f + //1: token:f; fan:6,c + //2: leaves + Assert.assertEquals(3, blockMeta.getRowTreeDepth()); + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + /** + * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only + * rA, but rB as well. + */ + KeyValue cfcRow = KeyValue.createFirstOnRow(Bytes.toBytes("cfc")); + CellScannerPosition position = searcher.positionAtOrAfter(cfcRow); + Assert.assertEquals(CellScannerPosition.AFTER, position); + Assert.assertEquals(d.get(2), searcher.getCurrent()); + searcher.previous(); + Assert.assertEquals(d.get(1), searcher.getCurrent()); + } +} + + diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java new file mode 100644 index 0000000..7dd5eca --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataDifferentTimestamps.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + */ +public class TestRowDataDifferentTimestamps extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + Brow = Bytes.toBytes("Brow"), + cf = Bytes.toBytes("fammy"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1"), + v0 = Bytes.toBytes("v0"); + + static List d = Lists.newArrayList(); + static{ + KeyValue kv0 = new KeyValue(Arow, cf, cq0, 0L, v0); + kv0.setMvccVersion(123456789L); + d.add(kv0); + + KeyValue kv1 = new KeyValue(Arow, cf, cq1, 1L, v0); + kv1.setMvccVersion(3L); + d.add(kv1); + + KeyValue kv2 = new KeyValue(Brow, cf, cq0, 12345678L, v0); + kv2.setMvccVersion(65537L); + d.add(kv2); + + //watch out... Long.MAX_VALUE comes back as 1332221664203, even with other encoders +// d.add(new KeyValue(Brow, cf, cq1, Long.MAX_VALUE, v0)); + KeyValue kv3 = new KeyValue(Brow, cf, cq1, Long.MAX_VALUE-1, v0); + kv3.setMvccVersion(1L); + d.add(kv3); + + KeyValue kv4 = new KeyValue(Brow, cf, cq1, 999999999, v0); + //don't set memstoreTS + d.add(kv4); + + KeyValue kv5 = new KeyValue(Brow, cf, cq1, 12345, v0); + kv5.setMvccVersion(0L); + d.add(kv5); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + Assert.assertTrue(blockMeta.getNumMvccVersionBytes() > 0); + Assert.assertEquals(12, blockMeta.getNumValueBytes()); + + Assert.assertFalse(blockMeta.isAllSameTimestamp()); + Assert.assertNotNull(blockMeta.getMinTimestamp()); + Assert.assertTrue(blockMeta.getTimestampIndexWidth() > 0); + Assert.assertTrue(blockMeta.getTimestampDeltaWidth() > 0); + + Assert.assertFalse(blockMeta.isAllSameMvccVersion()); + Assert.assertNotNull(blockMeta.getMinMvccVersion()); + Assert.assertTrue(blockMeta.getMvccVersionIndexWidth() > 0); + Assert.assertTrue(blockMeta.getMvccVersionDeltaWidth() > 0); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java new file mode 100644 index 0000000..ace18b3 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataEmpty.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataEmpty extends BaseTestRowData{ + + private static byte[] b = new byte[0]; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(b, b, b, 0L, Type.Put, b)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java new file mode 100644 index 0000000..b61f8c2 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataExerciseFInts.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataExerciseFInts extends BaseTestRowData{ + + static List rows; + static{ + List rowStrings = new ArrayList(); + rowStrings.add("com.edsBlog/directoryAa/pageAaa"); + rowStrings.add("com.edsBlog/directoryAa/pageBbb"); + rowStrings.add("com.edsBlog/directoryAa/pageCcc"); + rowStrings.add("com.edsBlog/directoryAa/pageDdd"); + rowStrings.add("com.edsBlog/directoryBb/pageEee"); + rowStrings.add("com.edsBlog/directoryBb/pageFff"); + rowStrings.add("com.edsBlog/directoryBb/pageGgg"); + rowStrings.add("com.edsBlog/directoryBb/pageHhh"); + rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); + rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); + rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); + rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); + rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); + rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); + rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); + rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); + ByteRangeTreeSet ba = new ByteRangeTreeSet(); + for(String row : rowStrings){ + ba.add(new ByteRange(Bytes.toBytes(row))); + } + rows = ba.compile().getSortedRanges(); + } + + static List cols = Lists.newArrayList(); + static{ + cols.add("Chrome"); + cols.add("Chromeb"); + cols.add("Firefox"); + cols.add("InternetExplorer"); + cols.add("Opera"); + cols.add("Safari"); + cols.add("Z1stBrowserWithHuuuuuuuuuuuugeQualifier"); + cols.add("Z2ndBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z3rdBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z4thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z5thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z6thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z7thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z8thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + cols.add("Z9thBrowserWithEvenBiggerQualifierMoreMoreMoreMoreMore"); + } + + static long ts = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static { + for (ByteRange row : rows) { + for (String col : cols) { + KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, + Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); + kvs.add(kv); + } + } + } + + @Override + public List getInputs() { + return kvs; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + Assert.assertTrue(blockMeta.getNextNodeOffsetWidth() > 1); + Assert.assertTrue(blockMeta.getQualifierOffsetWidth() > 1); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java new file mode 100644 index 0000000..00c0375 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataMultiFamilies.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataMultiFamilies extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"), + famA = Bytes.toBytes("famA"), + famB = Bytes.toBytes("famB"), + famBB = Bytes.toBytes("famBB"), + q0 = Bytes.toBytes("q0"), + q1 = Bytes.toBytes("q1"),//start with a different character + vvv = Bytes.toBytes("vvv"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, famA, q0, ts, vvv)); + d.add(new KeyValue(rowA, famB, q1, ts, vvv)); + d.add(new KeyValue(rowA, famBB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famA, q0, ts, vvv)); + d.add(new KeyValue(rowB, famA, q1, ts, vvv)); + d.add(new KeyValue(rowB, famB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famBB, q0, ts, vvv)); + d.add(new KeyValue(rowB, famBB, q1, ts, vvv)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java new file mode 100644 index 0000000..b4250dd --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNub.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataNub extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"),//nub + rowBB = Bytes.toBytes("rowBB"), + cf = PrefixTreeTestConstants.TEST_CF, + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1"), + v0 = Bytes.toBytes("v0"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowA, cf, cq1, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq1, ts, v0)); + d.add(new KeyValue(rowBB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowBB, cf, cq1, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java new file mode 100644 index 0000000..515fb57 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataNumberStrings.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.Type; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataNumberStrings extends BaseTestRowData{ + + static List d = Lists.newArrayList(); + static { + + /** + * Test a string-encoded list of numbers. 0, 1, 10, 11 will sort as 0, 1, 10, 11 if strings + *

    + * This helped catch a bug with reverse scanning where it was jumping from the last leaf cell to + * the previous nub. It should do 11->10, but it was incorrectly doing 11->1 + */ + List problematicSeries = Lists.newArrayList(0, 1, 10, 11);//sort this at the end + for(Integer i : problematicSeries){ +// for(int i=0; i < 13; ++i){ + byte[] row = Bytes.toBytes(""+i); + byte[] family = Bytes.toBytes("F"); + byte[] column = Bytes.toBytes("C"); + byte[] value = Bytes.toBytes("V"); + + d.add(new KeyValue(row, family, column, 0L, Type.Put, value)); + } + Collections.sort(d, new CellComparator()); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java new file mode 100644 index 0000000..cacca64 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataQualifierByteOrdering.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataQualifierByteOrdering extends BaseTestRowData{ + + static byte[] + Arow = Bytes.toBytes("Arow"), + Brow = Bytes.toBytes("Brow"), + Brow2 = Bytes.toBytes("Brow2"), + fam = Bytes.toBytes("HappyFam"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1tail"),//make sure tail does not come back as liat + cq2 = Bytes.toBytes("cq2"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(Arow, fam, cq0, ts, v0)); + d.add(new KeyValue(Arow, fam, cq1, ts, v0)); + d.add(new KeyValue(Brow, fam, cq0, ts, v0)); + d.add(new KeyValue(Brow, fam, cq2, ts, v0)); + d.add(new KeyValue(Brow2, fam, cq1, ts, v0)); + d.add(new KeyValue(Brow2, fam, cq2, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java new file mode 100644 index 0000000..0fb7a5b --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValues.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataRandomKeyValues extends BaseTestRowData { + + static List d = Lists.newArrayList(); + static RedundantKVGenerator generator = new RedundantKVGenerator(); + static { + d = generator.generateTestKeyValues(1 << 10); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java new file mode 100644 index 0000000..d764330 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSearcherRowMiss.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +public class TestRowDataSearcherRowMiss extends BaseTestRowData{ + + static byte[] + //don't let the rows share any common prefix bytes + A = Bytes.toBytes("A"), + AA = Bytes.toBytes("AA"), + AAA = Bytes.toBytes("AAA"), + B = Bytes.toBytes("B"), + cf = Bytes.toBytes("fam"), + cq = Bytes.toBytes("cq0"), + v = Bytes.toBytes("v0"); + + static long + ts = 55L; + + static List d = Lists.newArrayList(); + static{ + d.add(new KeyValue(A, cf, cq, ts, v)); + d.add(new KeyValue(AA, cf, cq, ts, v)); + d.add(new KeyValue(AAA, cf, cq, ts, v)); + d.add(new KeyValue(B, cf, cq, ts, v)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + assertRowOffsetsCorrect(); + + searcher.resetToBeforeFirstEntry(); + + //test first cell + searcher.next(); + Cell first = searcher.getCurrent(); + Assert.assertTrue(CellComparator.equals(d.get(0), first)); + + //test first cell in second row + Assert.assertTrue(searcher.positionAt(d.get(1))); + Assert.assertTrue(CellComparator.equals(d.get(1), searcher.getCurrent())); + + testBetween1and2(searcher); + testBetween2and3(searcher); + } + + /************ private methods, call from above *******************/ + + private void assertRowOffsetsCorrect(){ + Assert.assertEquals(4, getRowStartIndexes().size()); + } + + private void testBetween1and2(CellSearcher searcher){ + CellScannerPosition p;//reuse + Cell betweenAAndAAA = new KeyValue(AA, cf, cq, ts-2, v); + + //test exact + Assert.assertFalse(searcher.positionAt(betweenAAndAAA)); + + //test atOrBefore + p = searcher.positionAtOrBefore(betweenAAndAAA); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(1))); + + //test atOrAfter + p = searcher.positionAtOrAfter(betweenAAndAAA); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2))); + } + + private void testBetween2and3(CellSearcher searcher){ + CellScannerPosition p;//reuse + Cell betweenAAAndB = new KeyValue(AAA, cf, cq, ts-2, v); + + //test exact + Assert.assertFalse(searcher.positionAt(betweenAAAndB)); + + //test atOrBefore + p = searcher.positionAtOrBefore(betweenAAAndB); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(2))); + + //test atOrAfter + p = searcher.positionAtOrAfter(betweenAAAndB); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(3))); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java new file mode 100644 index 0000000..2a93763 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSimple.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CollectionUtils; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +public class TestRowDataSimple extends BaseTestRowData { + + static byte[] + // don't let the rows share any common prefix bytes + rowA = Bytes.toBytes("Arow"), + rowB = Bytes.toBytes("Brow"), cf = Bytes.toBytes("fam"), + cq0 = Bytes.toBytes("cq0"), + cq1 = Bytes.toBytes("cq1tail"),// make sure tail does not come back as liat + cq2 = Bytes.toBytes("dcq2"),// start with a different character + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowA, cf, cq1, ts, v0)); + d.add(new KeyValue(rowA, cf, cq2, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq1, ts, v0)); + d.add(new KeyValue(rowB, cf, cq2, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + CellScannerPosition p;// reuse + searcher.resetToBeforeFirstEntry(); + + // test first cell + searcher.next(); + Cell first = searcher.getCurrent(); + Assert.assertTrue(CellComparator.equals(d.get(0), first)); + + // test first cell in second row + Assert.assertTrue(searcher.positionAt(d.get(3))); + Assert.assertTrue(CellComparator.equals(d.get(3), searcher.getCurrent())); + + Cell between4And5 = new KeyValue(rowB, cf, cq1, ts - 2, v0); + + // test exact + Assert.assertFalse(searcher.positionAt(between4And5)); + + // test atOrBefore + p = searcher.positionAtOrBefore(between4And5); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(4))); + + // test atOrAfter + p = searcher.positionAtOrAfter(between4And5); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(5))); + + // test when key falls before first key in block + Cell beforeFirst = new KeyValue(Bytes.toBytes("A"), cf, cq0, ts, v0); + Assert.assertFalse(searcher.positionAt(beforeFirst)); + p = searcher.positionAtOrBefore(beforeFirst); + Assert.assertEquals(CellScannerPosition.BEFORE_FIRST, p); + p = searcher.positionAtOrAfter(beforeFirst); + Assert.assertEquals(CellScannerPosition.AFTER, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), d.get(0))); + Assert.assertEquals(d.get(0), searcher.getCurrent()); + + // test when key falls after last key in block + Cell afterLast = new KeyValue(Bytes.toBytes("z"), cf, cq0, ts, v0);// must be lower case z + Assert.assertFalse(searcher.positionAt(afterLast)); + p = searcher.positionAtOrAfter(afterLast); + Assert.assertEquals(CellScannerPosition.AFTER_LAST, p); + p = searcher.positionAtOrBefore(afterLast); + Assert.assertEquals(CellScannerPosition.BEFORE, p); + Assert.assertTrue(CellComparator.equals(searcher.getCurrent(), CollectionUtils.getLast(d))); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java new file mode 100644 index 0000000..21f6083 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataSingleQualifier.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +public class TestRowDataSingleQualifier extends BaseTestRowData{ + + static byte[] + rowA = Bytes.toBytes("rowA"), + rowB = Bytes.toBytes("rowB"), + cf = PrefixTreeTestConstants.TEST_CF, + cq0 = Bytes.toBytes("cq0"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rowA, cf, cq0, ts, v0)); + d.add(new KeyValue(rowB, cf, cq0, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java new file mode 100644 index 0000000..a5dda1d --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataTrivial.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.cell.CellScannerPosition; +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.codec.prefixtree.scanner.CellSearcher; + +import com.google.common.collect.Lists; + +public class TestRowDataTrivial extends BaseTestRowData{ + + static byte[] + rA = Bytes.toBytes("rA"), + rB = Bytes.toBytes("rB"),//turn "r" into a branch for the Searcher tests + cf = Bytes.toBytes("fam"), + cq0 = Bytes.toBytes("q0"), + v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + d.add(new KeyValue(rA, cf, cq0, ts, v0)); + d.add(new KeyValue(rB, cf, cq0, ts, v0)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + // node[0] -> root[r] + // node[1] -> leaf[A], etc + Assert.assertEquals(2, blockMeta.getRowTreeDepth()); + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + /** + * The searcher should get a token mismatch on the "r" branch. Assert that it skips not only rA, + * but rB as well. + */ + KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz")); + CellScannerPosition position = searcher.positionAtOrAfter(afterLast); + Assert.assertEquals(CellScannerPosition.AFTER_LAST, position); + Assert.assertNull(searcher.getCurrent()); + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java new file mode 100644 index 0000000..4d9b7a3 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrls.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.PrefixTreeTestConstants; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hbase.util.byterange.impl.ByteRangeTreeSet; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataUrls extends BaseTestRowData{ + + static List rows; + static{ + List rowStrings = new ArrayList(); + rowStrings.add("com.edsBlog/directoryAa/pageAaa"); + rowStrings.add("com.edsBlog/directoryAa/pageBbb"); + rowStrings.add("com.edsBlog/directoryAa/pageCcc"); + rowStrings.add("com.edsBlog/directoryAa/pageDdd"); + rowStrings.add("com.edsBlog/directoryBb/pageEee"); + rowStrings.add("com.edsBlog/directoryBb/pageFff"); + rowStrings.add("com.edsBlog/directoryBb/pageGgg"); + rowStrings.add("com.edsBlog/directoryBb/pageHhh"); + rowStrings.add("com.isabellasBlog/directoryAa/pageAaa"); + rowStrings.add("com.isabellasBlog/directoryAa/pageBbb"); + rowStrings.add("com.isabellasBlog/directoryAa/pageCcc"); + rowStrings.add("com.isabellasBlog/directoryAa/pageDdd"); + rowStrings.add("com.isabellasBlog/directoryBb/pageEee"); + rowStrings.add("com.isabellasBlog/directoryBb/pageFff"); + rowStrings.add("com.isabellasBlog/directoryBb/pageGgg"); + rowStrings.add("com.isabellasBlog/directoryBb/pageHhh"); + ByteRangeTreeSet ba = new ByteRangeTreeSet(); + for (String row : rowStrings) { + ba.add(new ByteRange(Bytes.toBytes(row))); + } + rows = ba.compile().getSortedRanges(); + } + + static List cols = Lists.newArrayList(); + static { + cols.add("Chrome"); + cols.add("Chromeb"); + cols.add("Firefox"); + cols.add("InternetExplorer"); + cols.add("Opera"); + cols.add("Safari"); + } + + static long ts = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static { + for (ByteRange row : rows) { + for (String col : cols) { + KeyValue kv = new KeyValue(row.deepCopyToNewArray(), PrefixTreeTestConstants.TEST_CF, + Bytes.toBytes(col), ts, KeyValue.Type.Put, Bytes.toBytes("VALUE")); + kvs.add(kv); + // System.out.println("TestRows5:"+kv); + } + } + } + + @Override + public List getInputs() { + return kvs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java new file mode 100644 index 0000000..d03bac9 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/row/data/TestRowDataUrlsExample.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.row.data; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.codec.prefixtree.encode.PrefixTreeEncoder; +import org.apache.hbase.codec.prefixtree.encode.column.ColumnNodeWriter; +import org.apache.hbase.codec.prefixtree.encode.row.RowNodeWriter; +import org.apache.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; +import org.apache.hbase.codec.prefixtree.row.BaseTestRowData; + +import com.google.common.collect.Lists; + +/* + * test different timestamps + * + * http://pastebin.com/7ks8kzJ2 + * http://pastebin.com/MPn03nsK + */ +public class TestRowDataUrlsExample extends BaseTestRowData{ + + static String TENANT_ID = Integer.toString(95322); + static String APP_ID = Integer.toString(12); + static List URLS = Lists.newArrayList( + "com.dablog/2011/10/04/boating", + "com.dablog/2011/10/09/lasers", + "com.jamiesrecipes", //this nub helped find a bug + "com.jamiesrecipes/eggs"); + static String FAMILY = "hits"; + static List BROWSERS = Lists.newArrayList( + "Chrome", "IE8", "IE9beta");//, "Opera", "Safari"); + static long TIMESTAMP = 1234567890; + + static int MAX_VALUE = 50; + + static List kvs = Lists.newArrayList(); + static{ + for(String rowKey : URLS){ + for(String qualifier : BROWSERS){ + KeyValue kv = new KeyValue( + Bytes.toBytes(rowKey), + Bytes.toBytes(FAMILY), + Bytes.toBytes(qualifier), + TIMESTAMP, + KeyValue.Type.Put, + Bytes.toBytes("VvvV")); + kvs.add(kv); + } + } + } + + /** + * Used for generating docs. + */ + public static void main(String... args) throws IOException{ + System.out.println("-- inputs --"); + System.out.println(KeyValueTestUtil.toStringWithPadding(kvs, true)); + ByteArrayOutputStream os = new ByteArrayOutputStream(1<<20); + PrefixTreeEncoder encoder = new PrefixTreeEncoder(os, false); + + for(KeyValue kv : kvs){ + encoder.write(kv); + } + encoder.flush(); + + System.out.println("-- qualifier SortedPtBuilderNodes --"); + for(TokenizerNode tokenizer : encoder.getQualifierWriter().getNonLeaves()){ + System.out.println(tokenizer); + } + for(TokenizerNode tokenizerNode : encoder.getQualifierWriter().getLeaves()){ + System.out.println(tokenizerNode); + } + + System.out.println("-- qualifier PtColumnNodeWriters --"); + for(ColumnNodeWriter writer : encoder.getQualifierWriter().getColumnNodeWriters()){ + System.out.println(writer); + } + + System.out.println("-- rowKey SortedPtBuilderNodes --"); + for(TokenizerNode tokenizerNode : encoder.getRowWriter().getNonLeaves()){ + System.out.println(tokenizerNode); + } + for(TokenizerNode tokenizerNode : encoder.getRowWriter().getLeaves()){ + System.out.println(tokenizerNode); + } + + System.out.println("-- row PtRowNodeWriters --"); + for(RowNodeWriter writer : encoder.getRowWriter().getNonLeafWriters()){ + System.out.println(writer); + } + for(RowNodeWriter writer : encoder.getRowWriter().getLeafWriters()){ + System.out.println(writer); + } + + System.out.println("-- concatenated values --"); + System.out.println(Bytes.toStringBinary(encoder.getValueByteRange().deepCopyToNewArray())); + } + + @Override + public List getInputs() { + return kvs; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java new file mode 100644 index 0000000..6db1a80 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampData.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.timestamp; + +import java.util.Collection; +import java.util.List; + +import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataBasic; +import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataNumbers; +import org.apache.hbase.codec.prefixtree.timestamp.data.TestTimestampDataRepeats; + +import com.google.common.collect.Lists; + +public interface TestTimestampData { + + List getInputs(); + long getMinimum(); + List getOutputs(); + + public static class InMemory { + public Collection getAllAsObjectArray() { + List all = Lists.newArrayList(); + all.add(new Object[] { new TestTimestampDataBasic() }); + all.add(new Object[] { new TestTimestampDataNumbers() }); + all.add(new Object[] { new TestTimestampDataRepeats() }); + return all; + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java new file mode 100644 index 0000000..d929217 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/TestTimestampEncoder.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.timestamp; + +import java.io.IOException; +import java.util.Collection; + +import junit.framework.Assert; + +import org.apache.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; +import org.apache.hbase.codec.prefixtree.encode.other.LongEncoder; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestTimestampEncoder { + + @Parameters + public static Collection parameters() { + return new TestTimestampData.InMemory().getAllAsObjectArray(); + } + + private TestTimestampData timestamps; + private PrefixTreeBlockMeta blockMeta; + private LongEncoder encoder; + private byte[] bytes; + private TimestampDecoder decoder; + + public TestTimestampEncoder(TestTimestampData testTimestamps) throws IOException { + this.timestamps = testTimestamps; + this.blockMeta = new PrefixTreeBlockMeta(); + this.blockMeta.setNumMetaBytes(0); + this.blockMeta.setNumRowBytes(0); + this.blockMeta.setNumQualifierBytes(0); + this.encoder = new LongEncoder(blockMeta); + for (Long ts : testTimestamps.getInputs()) { + encoder.add(ts); + } + encoder.compile(); + blockMeta.setTimestampFields(encoder); + bytes = encoder.getByteArray(); + decoder = new TimestampDecoder(); + decoder.initOnBlock(blockMeta, bytes); + } + + @Test + public void testCompressorMinimum() { + Assert.assertEquals(timestamps.getMinimum(), encoder.getMin()); + } + + @Test + public void testCompressorRoundTrip() { + long[] outputs = encoder.getSortedUniqueTimestamps(); + for (int i = 0; i < timestamps.getOutputs().size(); ++i) { + long input = timestamps.getOutputs().get(i); + long output = outputs[i]; + Assert.assertEquals(input, output); + } + } + + @Test + public void testReaderMinimum() { + Assert.assertEquals(timestamps.getMinimum(), decoder.getLong(0)); + } + + @Test + public void testReaderRoundTrip() { + for (int i = 0; i < timestamps.getOutputs().size(); ++i) { + long input = timestamps.getOutputs().get(i); + long output = decoder.getLong(i); + Assert.assertEquals(input, output); + } + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java new file mode 100644 index 0000000..5c0da94 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataBasic.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataBasic implements TestTimestampData { + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(5L); + d.add(3L); + d.add(0L); + d.add(1L); + d.add(3L); + return d; + } + + @Override + public long getMinimum() { + return 0L; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + d.add(0L); + d.add(1L); + d.add(3L); + d.add(5L); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java new file mode 100644 index 0000000..4c1cbd5 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataNumbers.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataNumbers implements TestTimestampData { + + private int shift = 8; + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(5L << shift); + d.add(3L << shift); + d.add(7L << shift); + d.add(1L << shift); + d.add(3L << shift); + return d; + } + + @Override + public long getMinimum() { + return 1L << shift; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + d.add(1L << shift); + d.add(3L << shift); + d.add(5L << shift); + d.add(7L << shift); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java new file mode 100644 index 0000000..985d584 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/codec/prefixtree/timestamp/data/TestTimestampDataRepeats.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.codec.prefixtree.timestamp.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hbase.codec.prefixtree.timestamp.TestTimestampData; + +public class TestTimestampDataRepeats implements TestTimestampData { + + private static long t = 1234567890L; + + @Override + public List getInputs() { + List d = new ArrayList(); + d.add(t); + d.add(t); + d.add(t); + d.add(t); + d.add(t); + return d; + } + + @Override + public long getMinimum() { + return t; + } + + @Override + public List getOutputs() { + List d = new ArrayList(); + return d; + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java new file mode 100644 index 0000000..41bbb6a --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/bytes/TestByteRange.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.bytes; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.util.ByteRange; +import org.junit.Test; + +public class TestByteRange { + + @Test + public void testConstructor() { + ByteRange b = new ByteRange(new byte[] { 0, 1, 2 }); + Assert.assertEquals(3, b.getLength()); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java new file mode 100644 index 0000000..8cf7bd9 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/comparator/ByteArrayComparator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.comparator; + +import java.util.Comparator; + +import org.apache.hadoop.hbase.util.Bytes; + +public class ByteArrayComparator implements Comparator { + + @Override + public int compare(byte[] a, byte[] b) { + return Bytes.compareTo(a, b); + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java new file mode 100644 index 0000000..05f9c02 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/NumberFormatter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.number; + +import java.text.DecimalFormat; + +public class NumberFormatter { + + public static String addCommas(final Number pValue) { + if (pValue == null) { + return null; + } + String format = "###,###,###,###,###,###,###,###.#####################"; + return new DecimalFormat(format).format(pValue);// biggest is 19 digits + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java new file mode 100644 index 0000000..57fd8f5 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/number/RandomNumberUtils.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.number; + +import java.util.Random; + +public class RandomNumberUtils { + + public static long nextPositiveLong(Random random) { + while (true) { + long value = random.nextLong(); + if (value > 0) { + return value; + } + } + } + +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java new file mode 100644 index 0000000..579af34 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestFIntTool.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; + +/********************** tests *************************/ + +public class TestFIntTool { + @Test + public void testLeadingZeros() { + Assert.assertEquals(64, Long.numberOfLeadingZeros(0)); + Assert.assertEquals(63, Long.numberOfLeadingZeros(1)); + Assert.assertEquals(0, Long.numberOfLeadingZeros(Long.MIN_VALUE)); + Assert.assertEquals(0, Long.numberOfLeadingZeros(-1)); + Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE)); + Assert.assertEquals(1, Long.numberOfLeadingZeros(Long.MAX_VALUE - 1)); + } + + @Test + public void testMaxValueForNumBytes() { + Assert.assertEquals(255, UFIntTool.maxValueForNumBytes(1)); + Assert.assertEquals(65535, UFIntTool.maxValueForNumBytes(2)); + Assert.assertEquals(0xffffff, UFIntTool.maxValueForNumBytes(3)); + Assert.assertEquals(0xffffffffffffffL, UFIntTool.maxValueForNumBytes(7)); + } + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UFIntTool.numBytes(0)); + Assert.assertEquals(1, UFIntTool.numBytes(1)); + Assert.assertEquals(1, UFIntTool.numBytes(255)); + Assert.assertEquals(2, UFIntTool.numBytes(256)); + Assert.assertEquals(2, UFIntTool.numBytes(65535)); + Assert.assertEquals(3, UFIntTool.numBytes(65536)); + Assert.assertEquals(4, UFIntTool.numBytes(0xffffffffL)); + Assert.assertEquals(5, UFIntTool.numBytes(0x100000000L)); + Assert.assertEquals(4, UFIntTool.numBytes(Integer.MAX_VALUE)); + Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE)); + Assert.assertEquals(8, UFIntTool.numBytes(Long.MAX_VALUE - 1)); + } + + @Test + public void testGetBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UFIntTool.getBytes(1, 0)); + Assert.assertArrayEquals(new byte[] { 1 }, UFIntTool.getBytes(1, 1)); + Assert.assertArrayEquals(new byte[] { -1 }, UFIntTool.getBytes(1, 255)); + Assert.assertArrayEquals(new byte[] { 1, 0 }, UFIntTool.getBytes(2, 256)); + Assert.assertArrayEquals(new byte[] { 1, 3 }, UFIntTool.getBytes(2, 256 + 3)); + Assert.assertArrayEquals(new byte[] { 1, -128 }, UFIntTool.getBytes(2, 256 + 128)); + Assert.assertArrayEquals(new byte[] { 1, -1 }, UFIntTool.getBytes(2, 256 + 255)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, + UFIntTool.getBytes(4, Integer.MAX_VALUE)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, + UFIntTool.getBytes(8, Long.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(0, UFIntTool.fromBytes(new byte[] { 0 })); + Assert.assertEquals(1, UFIntTool.fromBytes(new byte[] { 1 })); + Assert.assertEquals(255, UFIntTool.fromBytes(new byte[] { -1 })); + Assert.assertEquals(256, UFIntTool.fromBytes(new byte[] { 1, 0 })); + Assert.assertEquals(256 + 3, UFIntTool.fromBytes(new byte[] { 1, 3 })); + Assert.assertEquals(256 + 128, UFIntTool.fromBytes(new byte[] { 1, -128 })); + Assert.assertEquals(256 + 255, UFIntTool.fromBytes(new byte[] { 1, -1 })); + Assert.assertEquals(Integer.MAX_VALUE, UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1 })); + Assert.assertEquals(Long.MAX_VALUE, + UFIntTool.fromBytes(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 })); + } + + @Test + public void testRoundTrips() { + long[] values = new long[] { 0, 1, 2, 255, 256, 31123, 65535, 65536, 65537, 0xfffffeL, + 0xffffffL, 0x1000000L, 0x1000001L, Integer.MAX_VALUE - 1, Integer.MAX_VALUE, + (long) Integer.MAX_VALUE + 1, Long.MAX_VALUE - 1, Long.MAX_VALUE }; + for (int i = 0; i < values.length; ++i) { + Assert.assertEquals(values[i], UFIntTool.fromBytes(UFIntTool.getBytes(8, values[i]))); + } + } + + @Test + public void testWriteBytes() throws IOException {// copied from testGetBytes + Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(1, 0)); + Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1, 1)); + Assert.assertArrayEquals(new byte[] { -1 }, bytesViaOutputStream(1, 255)); + Assert.assertArrayEquals(new byte[] { 1, 0 }, bytesViaOutputStream(2, 256)); + Assert.assertArrayEquals(new byte[] { 1, 3 }, bytesViaOutputStream(2, 256 + 3)); + Assert.assertArrayEquals(new byte[] { 1, -128 }, bytesViaOutputStream(2, 256 + 128)); + Assert.assertArrayEquals(new byte[] { 1, -1 }, bytesViaOutputStream(2, 256 + 255)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1 }, + bytesViaOutputStream(4, Integer.MAX_VALUE)); + Assert.assertArrayEquals(new byte[] { 127, -1, -1, -1, -1, -1, -1, -1 }, + bytesViaOutputStream(8, Long.MAX_VALUE)); + } + + private byte[] bytesViaOutputStream(int outputWidth, long value) throws IOException { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + UFIntTool.writeBytes(outputWidth, value, os); + return os.toByteArray(); + } +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java new file mode 100644 index 0000000..1fc4064 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVIntTool.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +public class TestVIntTool { + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UVIntTool.numBytes(0)); + Assert.assertEquals(1, UVIntTool.numBytes(1)); + Assert.assertEquals(1, UVIntTool.numBytes(100)); + Assert.assertEquals(1, UVIntTool.numBytes(126)); + Assert.assertEquals(1, UVIntTool.numBytes(127)); + Assert.assertEquals(2, UVIntTool.numBytes(128)); + Assert.assertEquals(2, UVIntTool.numBytes(129)); + Assert.assertEquals(5, UVIntTool.numBytes(Integer.MAX_VALUE)); + } + + @Test + public void testWriteBytes() throws IOException { + Assert.assertArrayEquals(new byte[] { 0 }, bytesViaOutputStream(0)); + Assert.assertArrayEquals(new byte[] { 1 }, bytesViaOutputStream(1)); + Assert.assertArrayEquals(new byte[] { 63 }, bytesViaOutputStream(63)); + Assert.assertArrayEquals(new byte[] { 127 }, bytesViaOutputStream(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, bytesViaOutputStream(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, bytesViaOutputStream(155)); + Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, bytesViaOutputStream(Integer.MAX_VALUE)); + } + + private byte[] bytesViaOutputStream(int value) throws IOException { + ByteArrayOutputStream os = new ByteArrayOutputStream(); + UVIntTool.writeBytes(value, os); + return os.toByteArray(); + } + + @Test + public void testToBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UVIntTool.getBytes(0)); + Assert.assertArrayEquals(new byte[] { 1 }, UVIntTool.getBytes(1)); + Assert.assertArrayEquals(new byte[] { 63 }, UVIntTool.getBytes(63)); + Assert.assertArrayEquals(new byte[] { 127 }, UVIntTool.getBytes(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, UVIntTool.getBytes(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVIntTool.getBytes(155)); + Assert.assertArrayEquals(UVIntTool.MAX_VALUE_BYTES, UVIntTool.getBytes(Integer.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(Integer.MAX_VALUE, UVIntTool.getInt(UVIntTool.MAX_VALUE_BYTES)); + } + + @Test + public void testRoundTrips() { + Random random = new Random(); + for (int i = 0; i < 10000; ++i) { + int value = random.nextInt(Integer.MAX_VALUE); + byte[] bytes = UVIntTool.getBytes(value); + int roundTripped = UVIntTool.getInt(bytes); + Assert.assertEquals(value, roundTripped); + } + } + + @Test + public void testInputStreams() throws IOException { + ByteArrayInputStream is; + is = new ByteArrayInputStream(new byte[] { 0 }); + Assert.assertEquals(0, UVIntTool.getInt(is)); + is = new ByteArrayInputStream(new byte[] { 5 }); + Assert.assertEquals(5, UVIntTool.getInt(is)); + is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); + Assert.assertEquals(155, UVIntTool.getInt(is)); + } + +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java new file mode 100644 index 0000000..e294abd --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hbase/util/vint/TestVLongTool.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.util.vint; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Random; + +import org.apache.hbase.util.number.RandomNumberUtils; +import org.junit.Assert; +import org.junit.Test; + +public class TestVLongTool { + + @Test + public void testNumBytes() { + Assert.assertEquals(1, UVLongTool.numBytes(0)); + Assert.assertEquals(1, UVLongTool.numBytes(1)); + Assert.assertEquals(1, UVLongTool.numBytes(100)); + Assert.assertEquals(1, UVLongTool.numBytes(126)); + Assert.assertEquals(1, UVLongTool.numBytes(127)); + Assert.assertEquals(2, UVLongTool.numBytes(128)); + Assert.assertEquals(2, UVLongTool.numBytes(129)); + Assert.assertEquals(9, UVLongTool.numBytes(Long.MAX_VALUE)); + } + + @Test + public void testToBytes() { + Assert.assertArrayEquals(new byte[] { 0 }, UVLongTool.getBytes(0)); + Assert.assertArrayEquals(new byte[] { 1 }, UVLongTool.getBytes(1)); + Assert.assertArrayEquals(new byte[] { 63 }, UVLongTool.getBytes(63)); + Assert.assertArrayEquals(new byte[] { 127 }, UVLongTool.getBytes(127)); + Assert.assertArrayEquals(new byte[] { -128, 1 }, UVLongTool.getBytes(128)); + Assert.assertArrayEquals(new byte[] { -128 + 27, 1 }, UVLongTool.getBytes(155)); + Assert.assertArrayEquals(UVLongTool.MAX_VALUE_BYTES, UVLongTool.getBytes(Long.MAX_VALUE)); + } + + @Test + public void testFromBytes() { + Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES)); + } + + @Test + public void testFromBytesOffset() { + Assert.assertEquals(Long.MAX_VALUE, UVLongTool.getLong(UVLongTool.MAX_VALUE_BYTES, 0)); + + long ms = 1318966363481L; +// System.out.println(ms); + byte[] bytes = UVLongTool.getBytes(ms); +// System.out.println(Arrays.toString(bytes)); + long roundTripped = UVLongTool.getLong(bytes, 0); + Assert.assertEquals(ms, roundTripped); + + int calculatedNumBytes = UVLongTool.numBytes(ms); + int actualNumBytes = bytes.length; + Assert.assertEquals(actualNumBytes, calculatedNumBytes); + + byte[] shiftedBytes = new byte[1000]; + int shift = 33; + System.arraycopy(bytes, 0, shiftedBytes, shift, bytes.length); + long shiftedRoundTrip = UVLongTool.getLong(shiftedBytes, shift); + Assert.assertEquals(ms, shiftedRoundTrip); + } + + @Test + public void testRoundTrips() { + Random random = new Random(); + for (int i = 0; i < 10000; ++i) { + long value = RandomNumberUtils.nextPositiveLong(random); + byte[] bytes = UVLongTool.getBytes(value); + long roundTripped = UVLongTool.getLong(bytes); + Assert.assertEquals(value, roundTripped); + int calculatedNumBytes = UVLongTool.numBytes(value); + int actualNumBytes = bytes.length; + Assert.assertEquals(actualNumBytes, calculatedNumBytes); + } + } + + @Test + public void testInputStreams() throws IOException { + ByteArrayInputStream is; + is = new ByteArrayInputStream(new byte[] { 0 }); + Assert.assertEquals(0, UVLongTool.getLong(is)); + is = new ByteArrayInputStream(new byte[] { 5 }); + Assert.assertEquals(5, UVLongTool.getLong(is)); + is = new ByteArrayInputStream(new byte[] { -128 + 27, 1 }); + Assert.assertEquals(155, UVLongTool.getLong(is)); + } +} \ No newline at end of file diff --git a/hbase-server/pom.xml b/hbase-server/pom.xml index 53c643b..07d086b 100644 --- a/hbase-server/pom.xml +++ b/hbase-server/pom.xml @@ -280,6 +280,12 @@ org.apache.hbase + hbase-prefix-tree + + runtime + + + org.apache.hbase hbase-common test-jar diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index faf7b9b..5588259 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -112,9 +112,10 @@ import org.hbase.async.Scanner; public class PerformanceEvaluation extends Configured implements Tool { protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName()); - private static final int ROW_LENGTH = 1000; + private static final int DEFAULT_ROW_PREFIX_LENGTH = 16; + private static final int VALUE_LENGTH = 1000; private static final int ONE_GB = 1024 * 1024 * 1000; - private static final int ROWS_PER_GB = ONE_GB / ROW_LENGTH; + private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH; public static final byte[] COMPRESSION = Bytes.toBytes("NONE"); public static final byte[] TABLE_NAME = Bytes.toBytes("TestTable"); @@ -127,6 +128,7 @@ public class PerformanceEvaluation extends Configured implements Tool { private boolean miniCluster = false; private boolean nomapred = false; + private int rowPrefixLength = DEFAULT_ROW_PREFIX_LENGTH; private int N = 1; private int R = ROWS_PER_GB; private byte[] tableName = TABLE_NAME; @@ -537,10 +539,11 @@ public class PerformanceEvaluation extends Configured implements Tool { if (this.presplitRegions == 0) return new byte [0][]; - byte[][] splits = new byte[this.presplitRegions][]; + int numSplitPoints = presplitRegions - 1; + byte[][] splits = new byte[numSplitPoints][]; int jump = this.R / this.presplitRegions; - for (int i=0; i = 0; i--) { b[i] = (byte)((d % 10) + '0'); @@ -1436,10 +1439,10 @@ public class PerformanceEvaluation extends Configured implements Tool { * @return Generated random value to insert into a table cell. */ public static byte[] generateValue(final Random r) { - byte [] b = new byte [ROW_LENGTH]; + byte [] b = new byte [VALUE_LENGTH]; int i = 0; - for(i = 0; i < (ROW_LENGTH-8); i += 8) { + for(i = 0; i < (VALUE_LENGTH-8); i += 8) { b[i] = (byte) (65 + r.nextInt(26)); b[i+1] = b[i]; b[i+2] = b[i]; @@ -1451,7 +1454,7 @@ public class PerformanceEvaluation extends Configured implements Tool { } byte a = (byte) (65 + r.nextInt(26)); - for(; i < ROW_LENGTH; i++) { + for(; i < VALUE_LENGTH; i++) { b[i] = a; } return b; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java index 7d81a60..7c22049 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java @@ -105,15 +105,15 @@ public class TestEncodedSeekers { //write the data, but leave some in the memstore doPuts(region); - + //verify correctness when memstore contains data doGets(region); - + //verify correctness again after compacting region.compactStores(); doGets(region); - + Map encodingCounts = cache.getEncodingCountsForTest(); // Ensure that compactions don't pollute the cache with unencoded blocks @@ -124,8 +124,8 @@ public class TestEncodedSeekers { assertEquals(encoding, encodingInCache); assertTrue(encodingCounts.get(encodingInCache) > 0); } - - + + private void doPuts(HRegion region) throws IOException{ LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE); for (int i = 0; i < NUM_ROWS; ++i) { @@ -147,8 +147,8 @@ public class TestEncodedSeekers { } } } - - + + private void doGets(HRegion region) throws IOException{ for (int i = 0; i < NUM_ROWS; ++i) { final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i); diff --git a/pom.xml b/pom.xml index d18815c..995397b 100644 --- a/pom.xml +++ b/pom.xml @@ -42,7 +42,7 @@ 0.95-SNAPSHOT HBase - Apache HBase™ is the &lt;a href="http://hadoop.apache.org"&rt;Hadoop</a&rt; database. Use it when you need + Apache HBase™ is the &lt;a href="http://hadoop.apache.org"&rt;Hadoop</a&rt; database. Use it when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables -- billions of rows X millions of columns -- atop clusters of commodity hardware. @@ -58,6 +58,7 @@ hbase-common hbase-it hbase-examples + hbase-prefix-tree scm:svn:http://svn.apache.org/repos/asf/hbase/trunk @@ -979,6 +980,14 @@ test + org.apache.hbase + hbase-prefix-tree + ${project.version} + + runtime + + hbase-examples org.apache.hbase ${project.version}