diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java index 2e02ad2..18d54f8 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java @@ -93,6 +93,36 @@ public class CellComparator implements Comparator, Serializable{ return Longs.compare(b.getMvccVersion(), a.getMvccVersion()); } + public static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) { + return findCommonPrefix(left.getRowArray(), right.getRowArray(), left.getRowLength() + - rowCommonPrefix, right.getRowLength() - rowCommonPrefix, left.getRowOffset() + + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix); + } + + private static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, + int leftOffset, int rightOffset) { + int length = Math.min(leftLength, rightLength); + int result = 0; + + while (result < length && left[leftOffset + result] == right[rightOffset + result]) { + result++; + } + + return result; + } + + public static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) { + return findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(), left.getFamilyLength() + - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix, left.getFamilyOffset() + + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix); + } + + public static int findCommonPrefixInQualifierPart(Cell left, Cell right, int qualifierCommonPrefix) { + return findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(), + left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierLength() + - qualifierCommonPrefix, left.getQualifierOffset() + qualifierCommonPrefix, + right.getQualifierOffset() + qualifierCommonPrefix); + } /**************** equals ****************************/ @@ -212,4 +242,48 @@ public class CellComparator implements Comparator, Serializable{ return 0 == compareStaticIgnoreMvccVersion(a, b); } + private static int compare(byte[] left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength) { + return Bytes.compareTo(left, leftOffset, leftLength, right, rightOffset, rightLength); + } + + public static int compareRowsWithCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) { + return compare(left.getRowArray(), left.getRowOffset() + rowCommonPrefix, left.getRowLength() + - rowCommonPrefix, right.getRowArray(), right.getRowOffset() + rowCommonPrefix, + right.getRowLength() - rowCommonPrefix); + } + + public static int compareRowsWithCommonFamilyPrefix(Cell left, Cell right, int familyCommonPrefix) { + return compare(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix, + left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(), + right.getFamilyOffset() + familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix); + } + + public static int compareRowsWithQualifierFamilyPrefix(Cell left, Cell right, int qualCommonPrefix) { + return compare(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix, + left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(), + right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength() + - qualCommonPrefix); + } + + public static int compareTimestamps(final Cell left, final Cell right) { + // Compare timestamps + long ltimestamp = left.getTimestamp(); + long rtimestamp = right.getTimestamp(); + return compareTimestamps(ltimestamp, rtimestamp); + } + + private static int compareTimestamps(final long ltimestamp, final long rtimestamp) { + // The below older timestamps sorting ahead of newer timestamps looks + // wrong but it is intentional. This way, newer timestamps are first + // found when we iterate over a memstore and newer versions are the + // first we trip over when reading from a store file. + if (ltimestamp < rtimestamp) { + return 1; + } else if (ltimestamp > rtimestamp) { + return -1; + } + return 0; + } + } diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index a384773..2b2d8b9 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -1985,72 +1985,27 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return compareWithoutRow(0, left, loffset, llength, right, roffset, rlength, rrowlength); } + public int compareFlatKey(byte[] left, byte[] right) { return compareFlatKey(left, 0, left.length, right, 0, right.length); } + public int compareFlatKey(Cell left, Cell right) { + int compare = compareRows(left, right); + if (compare != 0) { + return compare; + } + return compareWithoutRow(left, right); + } + /** * Compares the Key of a cell -- with fields being more significant in this order: * rowkey, colfam/qual, timestamp, type, mvcc */ + @Override public int compare(final Cell left, final Cell right) { - // compare row - int compare = compareRowKey(left, right); - if (compare != 0) { - return compare; - } - - // compare vs minimum - byte ltype = left.getTypeByte(); - byte rtype = right.getTypeByte(); - // If the column is not specified, the "minimum" key type appears the - // latest in the sorted order, regardless of the timestamp. This is used - // for specifying the last key/value in a given row, because there is no - // "lexicographically last column" (it would be infinitely long). The - // "maximum" key type does not need this behavior. - int lcfqLen = left.getFamilyLength() + left.getQualifierLength() ; - int rcfqLen = right.getFamilyLength() + right.getQualifierLength() ; - if (lcfqLen == 0 && ltype == Type.Minimum.getCode()) { - // left is "bigger", i.e. it appears later in the sorted order - return 1; - } - if (rcfqLen == 0 && rtype == Type.Minimum.getCode()) { - return -1; - } - - - // compare col family / col fam + qual - // If left family size is not equal to right family size, we need not - // compare the qualifiers. - compare = Bytes.compareTo( - left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(), - right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength()); - if (compare != 0) { - return compare; - } - - // Compare qualifier - compare = Bytes.compareTo( - left.getQualifierArray(), left.getQualifierOffset(), left.getQualifierLength(), - right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength()); - if (compare!= 0) { - return compare; - } - - // compare timestamp - long ltimestamp = left.getTimestamp(); - long rtimestamp = right.getTimestamp(); - compare = compareTimestamps(ltimestamp, rtimestamp); - if (compare != 0) { - return compare; - } - - // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers. Maximum (255) - // appears ahead of everything, and minimum (0) appears after - // everything. - compare = (0xff & rtype) - (0xff & ltype); + int compare = compareFlatKey(left, right); if (compare != 0) { return compare; } @@ -2088,10 +2043,10 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return Long.MAX_VALUE; } - public int compareTimestamps(final KeyValue left, final KeyValue right) { + public int compareTimestamps(final Cell left, final Cell right) { // Compare timestamps - long ltimestamp = left.getTimestamp(left.getKeyLength()); - long rtimestamp = right.getTimestamp(right.getKeyLength()); + long ltimestamp = left.getTimestamp(); + long rtimestamp = right.getTimestamp(); return compareTimestamps(ltimestamp, rtimestamp); } @@ -2100,7 +2055,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @param right * @return Result comparing rows. */ - public int compareRows(final KeyValue left, final KeyValue right) { + public int compareRows(final Cell left, final Cell right) { return compareRows(left.getRowArray(),left.getRowOffset(), left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength()); } @@ -2120,19 +2075,24 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); } - int compareColumns(final KeyValue left, final short lrowlength, - final KeyValue right, final short rrowlength) { - int lfoffset = left.getFamilyOffset(lrowlength); - int rfoffset = right.getFamilyOffset(rrowlength); - int lclength = left.getTotalColumnLength(lrowlength,lfoffset); - int rclength = right.getTotalColumnLength(rrowlength, rfoffset); - int lfamilylength = left.getFamilyLength(lfoffset); - int rfamilylength = right.getFamilyLength(rfoffset); - return compareColumns(left.getBuffer(), lfoffset, - lclength, lfamilylength, - right.getBuffer(), rfoffset, rclength, rfamilylength); + int compareColumns(final Cell left, final short lrowlength, final Cell right, + final short rrowlength) { + int lfoffset = left.getFamilyOffset(); + int rfoffset = right.getFamilyOffset(); + int lclength = left.getQualifierLength(); + int rclength = right.getQualifierLength(); + int lfamilylength = left.getFamilyLength(); + int rfamilylength = right.getFamilyLength(); + int diff = compareFamilies(left.getFamilyArray(), lfoffset, lfamilylength, + right.getFamilyArray(), rfoffset, rfamilylength); + if (diff != 0) { + return diff; + } else { + return compareColumns(left.getQualifierArray(), left.getQualifierOffset(), lclength, + right.getQualifierArray(), right.getQualifierOffset(), rclength); + } } - + protected int compareColumns( byte [] left, int loffset, int llength, final int lfamilylength, byte [] right, int roffset, int rlength, final int rfamilylength) { @@ -2297,20 +2257,69 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return (0xff & rtype) - (0xff & ltype); } + private int compareWithoutRow(final Cell leftCell, final Cell rightCell) { + if (leftCell.getFamilyLength() + leftCell.getQualifierLength() == 0 && leftCell.getTypeByte() == Type.Minimum.getCode()) { + // left is "bigger", i.e. it appears later in the sorted order + return 1; + } + if (rightCell.getFamilyLength() + rightCell.getQualifierLength() == 0 && rightCell.getTypeByte() == Type.Minimum.getCode()) { + return -1; + } + boolean sameFamilySize = (leftCell.getFamilyLength() == rightCell.getFamilyLength()); + if (!sameFamilySize) { + // comparing column family is enough. + + return Bytes.compareTo(leftCell.getFamilyArray(), leftCell.getFamilyOffset(), leftCell.getFamilyLength(), + rightCell.getFamilyArray(), rightCell.getFamilyOffset(), rightCell.getFamilyLength()); + } + int diff = compareFamilies(leftCell.getFamilyArray(), leftCell.getFamilyOffset(), leftCell.getFamilyLength(), + rightCell.getFamilyArray(), rightCell.getFamilyOffset(), rightCell.getFamilyLength()); + if(diff != 0) { + return diff; + } + diff = compareColumns(leftCell.getQualifierArray(), leftCell.getQualifierOffset(), + leftCell.getQualifierLength(), rightCell.getQualifierArray(), + rightCell.getQualifierOffset(), rightCell.getQualifierLength()); + if(diff != 0) { + return diff; + } + + diff = compareTimestamps(leftCell, rightCell); + if(diff != 0) { + return diff; + } + // Compare types. Let the delete types sort ahead of puts; i.e. types + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. + return (0xff & rightCell.getTypeByte()) - (0xff & leftCell.getTypeByte()); + } + + protected int compareFamilies(final byte[] left, final int loffset, final int lfamilylength, + final byte[] right, final int roffset, final int rfamilylength) { + int diff = Bytes.compareTo(left, loffset, lfamilylength, right, roffset, rfamilylength); + return diff; + } + + protected int compareColumns(final byte[] left, final int loffset, final int lquallength, + final byte[] right, final int roffset, final int rquallength) { + int diff = Bytes.compareTo(left, loffset, lquallength, right, roffset, rquallength); + return diff; + } /** * Compares the row and column of two keyvalues for equality * @param left * @param right * @return True if same row and column. */ - public boolean matchingRowColumn(final KeyValue left, - final KeyValue right) { + public boolean matchingRowColumn(final Cell left, + final Cell right) { short lrowlength = left.getRowLength(); short rrowlength = right.getRowLength(); // TsOffset = end of column data. just comparing Row+CF length of each - if ((left.getTimestampOffset() - left.getOffset()) != - (right.getTimestampOffset() - right.getOffset())) { + if ((left.getRowLength() + left.getFamilyLength() + left.getQualifierLength()) != (right + .getRowLength() + right.getFamilyLength() + right.getQualifierLength())) { return false; } @@ -2318,15 +2327,21 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return false; } - int lfoffset = left.getFamilyOffset(lrowlength); - int rfoffset = right.getFamilyOffset(rrowlength); - int lclength = left.getTotalColumnLength(lrowlength,lfoffset); - int rclength = right.getTotalColumnLength(rrowlength, rfoffset); - int lfamilylength = left.getFamilyLength(lfoffset); - int rfamilylength = right.getFamilyLength(rfoffset); - int ccRes = compareColumns(left.getBuffer(), lfoffset, lclength, lfamilylength, - right.getBuffer(), rfoffset, rclength, rfamilylength); - return ccRes == 0; + int lfoffset = left.getFamilyOffset(); + int rfoffset = right.getFamilyOffset(); + int lclength = left.getQualifierLength(); + int rclength = right.getQualifierLength(); + int lfamilylength = left.getFamilyLength(); + int rfamilylength = right.getFamilyLength(); + int diff = compareFamilies(left.getFamilyArray(), lfoffset, lfamilylength, + right.getFamilyArray(), rfoffset, rfamilylength); + if (diff != 0) { + return false; + } else { + diff = compareColumns(left.getQualifierArray(), left.getQualifierOffset(), lclength, + right.getQualifierArray(), right.getQualifierOffset(), rclength); + return diff == 0; + } } /** @@ -2335,7 +2350,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @param right * @return True if rows match. */ - public boolean matchingRows(final KeyValue left, final KeyValue right) { + public boolean matchingRows(final Cell left, final Cell right) { short lrowlength = left.getRowLength(); short rrowlength = right.getRowLength(); return matchingRows(left, lrowlength, right, rrowlength); @@ -2348,8 +2363,8 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @param rrowlength * @return True if rows match. */ - private boolean matchingRows(final KeyValue left, final short lrowlength, - final KeyValue right, final short rrowlength) { + private boolean matchingRows(final Cell left, final short lrowlength, + final Cell right, final short rrowlength) { return lrowlength == rrowlength && matchingRows(left.getRowArray(), left.getRowOffset(), lrowlength, right.getRowArray(), right.getRowOffset(), rrowlength); @@ -2877,14 +2892,14 @@ public class KeyValue implements Cell, HeapSize, Cloneable { /** * Comparator that compares row component only of a KeyValue. */ - public static class RowOnlyComparator implements Comparator { + public static class RowOnlyComparator implements Comparator { final KVComparator comparator; public RowOnlyComparator(final KVComparator c) { this.comparator = c; } - public int compare(KeyValue left, KeyValue right) { + public int compare(Cell left, Cell right) { return comparator.compareRows(left, right); } } @@ -2924,6 +2939,13 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return Bytes.BYTES_RAWCOMPARATOR.compare(left, loffset, llength, right, roffset, rlength); } + public int compareFlatKey(Cell left, Cell right) { + // Only for test case purpose + return Bytes.BYTES_RAWCOMPARATOR.compare(left.getRowArray(), 0, left.getRowArray().length, + right.getRowArray(), 0, right.getRowArray().length); + + } + public byte[] calcIndexKey(byte[] lastKeyOfPreviousBlock, byte[] firstKeyInBlock) { return firstKeyInBlock; } diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index 513e2e2..062d0fe 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -22,10 +22,13 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.KeyValue.SamePrefixComparator; +import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.io.TagCompressionContext; import org.apache.hadoop.hbase.io.hfile.BlockType; import org.apache.hadoop.hbase.io.hfile.HFileContext; @@ -68,9 +71,15 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { protected int keyLength; protected int valueLength; protected int lastCommonPrefix; + protected int lastCommonRowPrefix; + protected int lastCommonFamilyPrefix; + protected int lastCommonQualPrefix; protected int tagsLength = 0; protected int tagsOffset = -1; - + protected int rowLength; + protected int familyLength; + protected int qualifierLength; + /** We need to store a copy of the key. */ protected byte[] keyBuffer = new byte[INITIAL_KEY_BUFFER_SIZE]; protected byte[] tagsBuffer = new byte[INITIAL_KEY_BUFFER_SIZE]; @@ -182,6 +191,12 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { } @Override + public int compareKey(KVComparator comparator, Cell key) { + return comparator.compareFlatKey(key, + KeyValue.createKeyValueFromKey(current.keyBuffer, 0, current.keyLength)); + } + + @Override public void setCurrentBuffer(ByteBuffer buffer) { if (this.tagCompressionContext != null) { this.tagCompressionContext.clear(); @@ -343,6 +358,109 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { return 1; } + @Override + public int seekToKeyInBlock(Cell key, boolean seekBefore) { + int commonPrefix = 0; + int rowCommonPrefix = 0; + int familyCommonPrefix = 0; + int qualCommonPrefix = 0; + previous.invalidate(); + do { + int comp; + if (samePrefixComparator != null) { + Cell right = KeyValue.createKeyValueFromKey(current.keyBuffer, 0, current.keyLength); + commonPrefix = Math.min(commonPrefix, current.lastCommonPrefix); + rowCommonPrefix += Math.min(rowCommonPrefix, + Math.min(key.getRowLength(), right.getRowLength())); + commonPrefix += CellComparator.findCommonPrefixInRowPart(key, right, rowCommonPrefix); + comp = CellComparator.compareRowsWithCommonRowPrefix(key, right, rowCommonPrefix); + if (comp == 0) { + comp = compareTypeBytes(key, right); + if (comp == 0) { + familyCommonPrefix += Math.min(familyCommonPrefix, + Math.min(key.getFamilyLength(), right.getFamilyLength())); + commonPrefix += CellComparator.findCommonPrefixInFamilyPart(key, right, + familyCommonPrefix); + comp = CellComparator.compareRowsWithCommonFamilyPrefix(key, right, + familyCommonPrefix); + if (comp == 0) { + qualCommonPrefix += Math.min(qualCommonPrefix, + Math.min(key.getQualifierLength(), right.getQualifierLength())); + commonPrefix += CellComparator.findCommonPrefixInQualifierPart(key, right, + qualCommonPrefix); + comp = CellComparator.compareRowsWithQualifierFamilyPrefix(key, right, + qualCommonPrefix); + if (comp == 0) { + comp = CellComparator.compareTimestamps(key, right); + if (comp == 0) { + // Compare types. Let the delete types sort ahead of puts; + // i.e. types + // of higher numbers sort before those of lesser numbers. + // Maximum + // (255) + // appears ahead of everything, and minimum (0) appears + // after + // everything. + comp = (0xff & right.getTypeByte()) - (0xff & key.getTypeByte()); + } + } + } + } + } + } else { + Cell right = KeyValue.createKeyValueFromKey(current.keyBuffer, 0, current.keyLength); + comp = comparator.compareFlatKey(key, right); + } + + if (comp == 0) { // exact match + if (seekBefore) { + if (!previous.isValid()) { + // The caller (seekBefore) has to ensure that we are not at the + // first key in the block. + throw new IllegalStateException("Cannot seekBefore if " + + "positioned at the first key in the block: key=" + + Bytes.toStringBinary(key.getRowArray())); + } + moveToPrevious(); + return 1; + } + return 0; + } + + if (comp < 0) { // already too large, check previous + if (previous.isValid()) { + moveToPrevious(); + } else { + return HConstants.INDEX_KEY_MAGIC; // using optimized index key + } + return 1; + } + + // move to next, if more data is available + if (currentBuffer.hasRemaining()) { + previous.copyFromNext(current); + decodeNext(); + } else { + break; + } + } while (true); + + // we hit the end of the block, not an exact match + return 1; + } + + private int compareTypeBytes(Cell key, Cell right) { + if (key.getFamilyLength() + key.getQualifierLength() == 0 && key.getTypeByte() == Type.Minimum.getCode()) { + // left is "bigger", i.e. it appears later in the sorted order + return 1; + } + if (right.getFamilyLength() + right.getQualifierLength() == 0 && right.getTypeByte() == Type.Minimum.getCode()) { + return -1; + } + return 0; + } + + private void moveToPrevious() { if (!previous.isValid()) { throw new IllegalStateException( diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java index 6185ab3..3c06815 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.io.hfile.HFileContext; @@ -174,9 +175,26 @@ public interface DataBlockEncoder { * of an exact match. Does not matter in case of an inexact match. * @return 0 on exact match, 1 on inexact match. */ + @Deprecated int seekToKeyInBlock( byte[] key, int offset, int length, boolean seekBefore ); + /** + * Moves the seeker position within the current block to: + *
    + *
  • the last key that that is less than or equal to the given key if + * seekBefore is false
  • + *
  • the last key that is strictly less than the given key if + * seekBefore is true. The caller is responsible for loading the + * previous block if the requested key turns out to be the first key of the + * current block.
  • + *
+ * @param key - Cell to which the seek should happen + * @param seekBefore find the key strictly less than the given key in case + * of an exact match. Does not matter in case of an inexact match. + * @return 0 on exact match, 1 on inexact match. + */ + int seekToKeyInBlock(Cell key, boolean seekBefore); /** * Compare the given key against the current key @@ -187,5 +205,7 @@ public interface DataBlockEncoder { * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater */ public int compareKey(KVComparator comparator, byte[] key, int offset, int length); + + public int compareKey(KVComparator comparator, Cell key); } } diff --git hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 4679194..983ec46 100644 --- hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -43,6 +43,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.WritableComparator; @@ -1633,6 +1635,29 @@ public class Bytes { } return - (low+1); } + + public static int binarySearch(byte[][] arr, Cell leftCell, RawComparator comparator) { + int low = 0; + int high = arr.length - 1; + + while (low <= high) { + int mid = (low+high) >>> 1; + // we have to compare in this order, because the comparator order + // has special logic when the 'left side' is a special key. + Cell rightCell = KeyValue.createKeyValueFromKey(arr[mid]); + int cmp = comparator.compare(leftCell, rightCell); + // key lives above the midpoint + if (cmp > 0) + low = mid + 1; + // key lives below the midpoint + else if (cmp < 0) + high = mid - 1; + // BAM. how often does this really happen? + else + return mid; + } + return - (low+1); + } /** * Bytewise binary increment/deincrement of long contained in byte array diff --git hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java index d673457..8d79ccf 100644 --- hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java +++ hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeSeeker.java @@ -24,8 +24,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.KeyValue.KVComparator; +import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.codec.prefixtree.decode.DecoderFactory; import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; @@ -152,15 +152,13 @@ public class PrefixTreeSeeker implements EncodedSeeker { boolean forceBeforeOnExactMatch) { if (USE_POSITION_BEFORE) { return seekToOrBeforeUsingPositionAtOrBefore(keyOnlyBytes, offset, length, - forceBeforeOnExactMatch); - }else{ + forceBeforeOnExactMatch); + } else { return seekToOrBeforeUsingPositionAtOrAfter(keyOnlyBytes, offset, length, - forceBeforeOnExactMatch); + forceBeforeOnExactMatch); } } - - /* * Support both of these options since the underlying PrefixTree supports both. Possibly * expand the EncodedSeeker to utilize them both. @@ -171,9 +169,20 @@ public class PrefixTreeSeeker implements EncodedSeeker { // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + return seekToOrBeforeUsingPositionAtOrBefore(kv, seekBefore); + } + + /* + * Support both of these options since the underlying PrefixTree supports + * both. Possibly expand the EncodedSeeker to utilize them both. + */ + + protected int seekToOrBeforeUsingPositionAtOrBefore(Cell kv, boolean seekBefore) { + // this does a deep copy of the key byte[] because the CellSearcher + // interface wants a Cell CellScannerPosition position = ptSearcher.seekForwardToOrBefore(kv); - if(CellScannerPosition.AT == position){ + if (CellScannerPosition.AT == position) { if (seekBefore) { ptSearcher.previous(); return 1; @@ -184,16 +193,19 @@ public class PrefixTreeSeeker implements EncodedSeeker { return 1; } - protected int seekToOrBeforeUsingPositionAtOrAfter(byte[] keyOnlyBytes, int offset, int length, - boolean seekBefore){ - // this does a deep copy of the key byte[] because the CellSearcher interface wants a Cell + boolean seekBefore) { + // this does a deep copy of the key byte[] because the CellSearcher + // interface wants a Cell KeyValue kv = KeyValue.createKeyValueFromKey(keyOnlyBytes, offset, length); + return seekToOrBeforeUsingPositionAtOrAfter(kv, seekBefore); + } - //should probably switch this to use the seekForwardToOrBefore method + protected int seekToOrBeforeUsingPositionAtOrAfter(Cell kv, boolean seekBefore) { + // should probably switch this to use the seekForwardToOrBefore method CellScannerPosition position = ptSearcher.seekForwardToOrAfter(kv); - if(CellScannerPosition.AT == position){ + if (CellScannerPosition.AT == position) { if (seekBefore) { ptSearcher.previous(); return 1; @@ -202,21 +214,21 @@ public class PrefixTreeSeeker implements EncodedSeeker { } - if(CellScannerPosition.AFTER == position){ - if(!ptSearcher.isBeforeFirst()){ + if (CellScannerPosition.AFTER == position) { + if (!ptSearcher.isBeforeFirst()) { ptSearcher.previous(); } return 1; } - if(position == CellScannerPosition.AFTER_LAST){ + if (position == CellScannerPosition.AFTER_LAST) { if (seekBefore) { ptSearcher.previous(); } return 1; } - throw new RuntimeException("unexpected CellScannerPosition:"+position); + throw new RuntimeException("unexpected CellScannerPosition:" + position); } @Override @@ -225,4 +237,19 @@ public class PrefixTreeSeeker implements EncodedSeeker { ByteBuffer bb = getKeyDeepCopy(); return comparator.compareFlatKey(key, offset, length, bb.array(), bb.arrayOffset(), bb.limit()); } + + @Override + public int seekToKeyInBlock(Cell key, boolean forceBeforeOnExactMatch) { + if (USE_POSITION_BEFORE) { + return seekToOrBeforeUsingPositionAtOrBefore(key, forceBeforeOnExactMatch); + }else{ + return seekToOrBeforeUsingPositionAtOrAfter(key, forceBeforeOnExactMatch); + } + } + + @Override + public int compareKey(KVComparator comparator, Cell key) { + ByteBuffer bb = getKeyDeepCopy(); + return comparator.compare(key, KeyValue.createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit())); + } } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java index 736f330..2ad8a1f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java @@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Scan; @@ -56,6 +57,8 @@ public class HalfStoreFileReader extends StoreFile.Reader { // i.e. empty column and a timestamp of LATEST_TIMESTAMP. protected final byte [] splitkey; + protected final Cell splitCell; + private byte[] firstKey = null; private boolean firstKeySeeked = false; @@ -79,6 +82,7 @@ public class HalfStoreFileReader extends StoreFile.Reader { // have an actual midkey themselves. No midkey is how we indicate file is // not splittable. this.splitkey = r.getSplitKey(); + this.splitCell = KeyValue.createKeyValueFromKey(this.splitkey); // Is it top or bottom half? this.top = Reference.isTopFileRegion(r.getFileRegion()); } @@ -104,6 +108,7 @@ public class HalfStoreFileReader extends StoreFile.Reader { // have an actual midkey themselves. No midkey is how we indicate file is // not splittable. this.splitkey = r.getSplitKey(); + this.splitCell = KeyValue.createKeyValueFromKey(this.splitkey); // Is it top or bottom half? this.top = Reference.isTopFileRegion(r.getFileRegion()); } @@ -168,33 +173,21 @@ public class HalfStoreFileReader extends StoreFile.Reader { return true; } + @Override public boolean seekBefore(byte[] key) throws IOException { return seekBefore(key, 0, key.length); } + @Override public boolean seekBefore(byte [] key, int offset, int length) throws IOException { - if (top) { - byte[] fk = getFirstKey(); - // This will be null when the file is empty in which we can not seekBefore to any key - if (fk == null) return false; - if (getComparator().compareFlatKey(key, offset, length, fk, 0, - fk.length) <= 0) { - return false; - } - } else { - // The equals sign isn't strictly necessary just here to be consistent with seekTo - if (getComparator().compareFlatKey(key, offset, length, splitkey, 0, - splitkey.length) >= 0) { - return this.delegate.seekBefore(splitkey, 0, splitkey.length); - } - } - return this.delegate.seekBefore(key, offset, length); + return seekBefore(KeyValue.createKeyValueFromKey(key, offset, length)); } + @Override public boolean seekTo() throws IOException { if (top) { - int r = this.delegate.seekTo(splitkey); + int r = this.delegate.seekTo(KeyValue.createKeyValueFromKey(splitkey)); if (r == HConstants.INDEX_KEY_MAGIC) { return true; } @@ -219,55 +212,75 @@ public class HalfStoreFileReader extends StoreFile.Reader { splitkey, 0, splitkey.length) < 0; } + @Override public int seekTo(byte[] key) throws IOException { return seekTo(key, 0, key.length); } + @Override public int seekTo(byte[] key, int offset, int length) throws IOException { + return seekTo(KeyValue.createKeyValueFromKey(key, offset, length)); + } + + @Override + public int reseekTo(byte[] key) throws IOException { + return reseekTo(key, 0, key.length); + } + + @Override + public int reseekTo(byte[] key, int offset, int length) + throws IOException { + //This function is identical to the corresponding seekTo function except + //that we call reseekTo (and not seekTo) on the delegate. + return reseekTo(KeyValue.createKeyValueFromKey(key, offset, length)); + } + + public org.apache.hadoop.hbase.io.hfile.HFile.Reader getReader() { + return this.delegate.getReader(); + } + + public boolean isSeeked() { + return this.delegate.isSeeked(); + } + + @Override + public int seekTo(Cell key) throws IOException { if (top) { - if (getComparator().compareFlatKey(key, offset, length, splitkey, 0, - splitkey.length) < 0) { + if (getComparator().compareFlatKey(key, splitCell) < 0) { return -1; } } else { - if (getComparator().compareFlatKey(key, offset, length, splitkey, 0, - splitkey.length) >= 0) { + if (getComparator().compareFlatKey(key, splitCell) >= 0) { // we would place the scanner in the second half. // it might be an error to return false here ever... - boolean res = delegate.seekBefore(splitkey, 0, splitkey.length); + boolean res = delegate.seekBefore(splitCell); if (!res) { - throw new IOException("Seeking for a key in bottom of file, but key exists in top of file, failed on seekBefore(midkey)"); + throw new IOException( + "Seeking for a key in bottom of file, but key exists in top of file, failed on seekBefore(midkey)"); } return 1; } } - return delegate.seekTo(key, offset, length); + return delegate.seekTo(key); } @Override - public int reseekTo(byte[] key) throws IOException { - return reseekTo(key, 0, key.length); - } - - @Override - public int reseekTo(byte[] key, int offset, int length) - throws IOException { - //This function is identical to the corresponding seekTo function except - //that we call reseekTo (and not seekTo) on the delegate. + public int reseekTo(Cell key) throws IOException { + // This function is identical to the corresponding seekTo function + // except + // that we call reseekTo (and not seekTo) on the delegate. if (top) { - if (getComparator().compareFlatKey(key, offset, length, splitkey, 0, - splitkey.length) < 0) { + if (getComparator().compareFlatKey(key, splitCell) < 0) { return -1; } } else { - if (getComparator().compareFlatKey(key, offset, length, splitkey, 0, - splitkey.length) >= 0) { + if (getComparator().compareFlatKey(key, splitCell) >= 0) { // we would place the scanner in the second half. // it might be an error to return false here ever... - boolean res = delegate.seekBefore(splitkey, 0, splitkey.length); + boolean res = delegate.seekBefore(splitCell); if (!res) { - throw new IOException("Seeking for a key in bottom of file, but" + - " key exists in top of file, failed on seekBefore(midkey)"); + throw new IOException("Seeking for a key in bottom of file, but" + + " key exists in top of file, failed on seekBefore(midkey)"); } return 1; } @@ -276,15 +289,28 @@ public class HalfStoreFileReader extends StoreFile.Reader { // skip the 'reseek' and just return 1. return 1; } - return delegate.reseekTo(key, offset, length); + return delegate.reseekTo(key); } - public org.apache.hadoop.hbase.io.hfile.HFile.Reader getReader() { - return this.delegate.getReader(); - } - - public boolean isSeeked() { - return this.delegate.isSeeked(); + @Override + public boolean seekBefore(Cell key) throws IOException { + if (top) { + Cell fk = KeyValue.createKeyValueFromKey(getFirstKey()); + // This will be null when the file is empty in which we can not + // seekBefore to any key + if (fk == null) + return false; + if (getComparator().compareFlatKey(key, fk) <= 0) { + return false; + } + } else { + // The equals sign isn't strictly necessary just here to be consistent + // with seekTo + if (getComparator().compareFlatKey(key, splitCell) >= 0) { + return this.delegate.seekBefore(splitCell); + } + } + return this.delegate.seekBefore(key); } }; } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index 9575406..4f7b04f 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; @@ -184,7 +185,146 @@ public class HFileBlockIndex { return blockWithScanInfo.getHFileBlock(); } } + + public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks, + boolean pread, boolean isCompaction) + throws IOException { + BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock, cacheBlocks, + pread, isCompaction); + if (blockWithScanInfo == null) { + return null; + } else { + return blockWithScanInfo.getHFileBlock(); + } + } + + /** + * Return the BlockWithScanInfo which contains the DataBlock with other scan + * info such as nextIndexedKey. This function will only be called when the + * HFile version is larger than 1. + * + * @param key + * the key we are looking for + * @param keyOffset + * the offset of the key in its byte array + * @param keyLength + * the length of the key + * @param currentBlock + * the current block, to avoid re-reading the same block + * @param cacheBlocks + * @param pread + * @param isCompaction + * @return the BlockWithScanInfo which contains the DataBlock with other + * scan info such as nextIndexedKey. + * @throws IOException + */ + public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset, + int keyLength, HFileBlock currentBlock, boolean cacheBlocks, + boolean pread, boolean isCompaction) + throws IOException { + int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength); + if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { + return null; + } + return loadDataBlockWithScanInfoInternals(key, keyOffset, keyLength, currentBlock, + cacheBlocks, pread, isCompaction, rootLevelIndex); + } + + private BlockWithScanInfo loadDataBlockWithScanInfoInternals(final byte[] key, int keyOffset, + int keyLength, HFileBlock currentBlock, boolean cacheBlocks, boolean pread, + boolean isCompaction, int rootLevelIndex) throws IOException { + // the next indexed key + byte[] nextIndexedKey = null; + + // Read the next-level (intermediate or leaf) index block. + long currentOffset = blockOffsets[rootLevelIndex]; + int currentOnDiskSize = blockDataSizes[rootLevelIndex]; + + if (rootLevelIndex < blockKeys.length - 1) { + nextIndexedKey = blockKeys[rootLevelIndex + 1]; + } else { + nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY; + } + + int lookupLevel = 1; // How many levels deep we are in our lookup. + int index = -1; + + HFileBlock block; + while (true) { + + if (currentBlock != null && currentBlock.getOffset() == currentOffset) + { + // Avoid reading the same block again, even with caching turned off. + // This is crucial for compaction-type workload which might have + // caching turned off. This is like a one-block cache inside the + // scanner. + block = currentBlock; + } else { + // Call HFile's caching block reader API. We always cache index + // blocks, otherwise we might get terrible performance. + boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); + BlockType expectedBlockType; + if (lookupLevel < searchTreeLevel - 1) { + expectedBlockType = BlockType.INTERMEDIATE_INDEX; + } else if (lookupLevel == searchTreeLevel - 1) { + expectedBlockType = BlockType.LEAF_INDEX; + } else { + // this also accounts for ENCODED_DATA + expectedBlockType = BlockType.DATA; + } + block = cachingBlockReader.readBlock(currentOffset, + currentOnDiskSize, shouldCache, pread, isCompaction, + expectedBlockType); + } + + if (block == null) { + throw new IOException("Failed to read block at offset " + + currentOffset + ", onDiskSize=" + currentOnDiskSize); + } + + // Found a data block, break the loop and check our level in the tree. + if (block.getBlockType().isData()) { + break; + } + + // Not a data block. This must be a leaf-level or intermediate-level + // index block. We don't allow going deeper than searchTreeLevel. + if (++lookupLevel > searchTreeLevel) { + throw new IOException("Search Tree Level overflow: lookupLevel="+ + lookupLevel + ", searchTreeLevel=" + searchTreeLevel); + } + // Locate the entry corresponding to the given key in the non-root + // (leaf or intermediate-level) index block. + ByteBuffer buffer = block.getBufferWithoutHeader(); + index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator); + if (index == -1) { + throw new IOException("The key " + + Bytes.toStringBinary(key, keyOffset, keyLength) + + " is before the" + " first key of the non-root index block " + + block); + } + + currentOffset = buffer.getLong(); + currentOnDiskSize = buffer.getInt(); + + // Only update next indexed key if there is a next indexed key in the current level + byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1); + if (tmpNextIndexedKey != null) { + nextIndexedKey = tmpNextIndexedKey; + } + } + + if (lookupLevel != searchTreeLevel) { + throw new IOException("Reached a data block at level " + lookupLevel + + " but the number of levels is " + searchTreeLevel); + } + + // set the next indexed key for the current block. + BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey); + return blockWithScanInfo; + } + /** * Return the BlockWithScanInfo which contains the DataBlock with other scan info * such as nextIndexedKey. @@ -202,11 +342,10 @@ public class HFileBlockIndex { * such as nextIndexedKey. * @throws IOException */ - public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset, - int keyLength, HFileBlock currentBlock, boolean cacheBlocks, + public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, boolean cacheBlocks, boolean pread, boolean isCompaction) throws IOException { - int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength); + int rootLevelIndex = rootBlockContainingKey(key); if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { return null; } @@ -275,10 +414,11 @@ public class HFileBlockIndex { // Locate the entry corresponding to the given key in the non-root // (leaf or intermediate-level) index block. ByteBuffer buffer = block.getBufferWithoutHeader(); - index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator); + index = locateNonRootIndexEntry(buffer, key, comparator); if (index == -1) { + // This has to be changed throw new IOException("The key " - + Bytes.toStringBinary(key, keyOffset, keyLength) + + Bytes.toStringBinary(key.getRowArray(), key.getRowOffset(), key.getRowLength()) + " is before the" + " first key of the non-root index block " + block); } @@ -411,6 +551,36 @@ public class HFileBlockIndex { } /** + * Finds the root-level index block containing the given key. + * + * @param key + * Key to find + * @return Offset of block containing key (between 0 and the + * number of blocks - 1) or -1 if this file does not contain the + * request. + */ + public int rootBlockContainingKey(final Cell key) { + int pos = Bytes.binarySearch(blockKeys, key, comparator); + // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see + // binarySearch's javadoc. + + if (pos >= 0) { + // This means this is an exact match with an element of blockKeys. + assert pos < blockKeys.length; + return pos; + } + + // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], + // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that + // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if + // key < blockKeys[0], meaning the file does not contain the given key. + + int i = -pos - 1; + assert 0 <= i && i <= blockKeys.length; + return i - 1; + } + + /** * Adds a new entry in the root block index. Only used when reading. * * @param key Last key in the block @@ -550,6 +720,98 @@ public class HFileBlockIndex { } /** + * Performs a binary search over a non-root level index block. Utilizes the + * secondary index, which records the offsets of (offset, onDiskSize, + * firstKey) tuples of all entries. + * + * @param key + * the key we are searching for offsets to individual entries in + * the blockIndex buffer + * @param keyOffset + * the offset of the key in its byte array + * @param keyLength + * the length of the key + * @param nonRootIndex + * the non-root index block buffer, starting with the secondary + * index. The position is ignored. + * @return the index i in [0, numEntries - 1] such that keys[i] <= key < + * keys[i + 1], if keys is the array of all keys being searched, or + * -1 otherwise + * @throws IOException + */ + static int binarySearchNonRootIndex(Cell key, ByteBuffer nonRootIndex, KVComparator comparator) { + + int numEntries = nonRootIndex.getInt(0); + int low = 0; + int high = numEntries - 1; + int mid = 0; + + // Entries start after the number of entries and the secondary index. + // The secondary index takes numEntries + 1 ints. + int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2); + + // If we imagine that keys[-1] = -Infinity and + // keys[numEntries] = Infinity, then we are maintaining an invariant that + // keys[low - 1] < key < keys[high + 1] while narrowing down the range. + + while (low <= high) { + mid = (low + high) >>> 1; + + // Midkey's offset relative to the end of secondary index + int midKeyRelOffset = nonRootIndex.getInt( + Bytes.SIZEOF_INT * (mid + 1)); + + // The offset of the middle key in the blockIndex buffer + int midKeyOffset = entriesOffset // Skip secondary index + + midKeyRelOffset // Skip all entries until mid + + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size + + // We subtract the two consecutive secondary index elements, which + // gives us the size of the whole (offset, onDiskSize, key) tuple. We + // then need to subtract the overhead of offset and onDiskSize. + int midLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (mid + 2)) - + midKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD; + + // we have to compare in this order, because the comparator order + // has special logic when the 'left side' is a special key. + Cell nonRootIndexKV = KeyValue.createKeyValueFromKey(nonRootIndex.array(), + nonRootIndex.arrayOffset() + midKeyOffset, midLength); + int cmp = comparator.compareFlatKey(key, nonRootIndexKV); + + // key lives above the midpoint + if (cmp > 0) + low = mid + 1; // Maintain the invariant that keys[low - 1] < key + // key lives below the midpoint + else if (cmp < 0) + high = mid - 1; // Maintain the invariant that key < keys[high + 1] + else + return mid; // exact match + } + + // As per our invariant, keys[low - 1] < key < keys[high + 1], meaning + // that low - 1 < high + 1 and (low - high) <= 1. As per the loop break + // condition, low >= high + 1. Therefore, low = high + 1. + + if (low != high + 1) { + throw new IllegalStateException("Binary search broken: low=" + low + + " " + "instead of " + (high + 1)); + } + + // OK, our invariant says that keys[low - 1] < key < keys[low]. We need to + // return i such that keys[i] <= key < keys[i + 1]. Therefore i = low - 1. + int i = low - 1; + + // Some extra validation on the result. + if (i < -1 || i >= numEntries) { + throw new IllegalStateException("Binary search broken: result is " + + i + " but expected to be between -1 and (numEntries - 1) = " + + (numEntries - 1)); + } + + return i; + } + + /** * Search for one key using the secondary index in a non-root block. In case * of success, positions the provided buffer at the entry of interest, where * the file offset and the on-disk-size can be read. @@ -586,6 +848,43 @@ public class HFileBlockIndex { } /** + * Search for one key using the secondary index in a non-root block. In case + * of success, positions the provided buffer at the entry of interest, where + * the file offset and the on-disk-size can be read. + * + * @param nonRootBlock + * a non-root block without header. Initial position does not + * matter. + * @param key + * the byte array containing the key + * @param keyOffset + * the offset of the key in its byte array + * @param keyLength + * the length of the key + * @return the index position where the given key was found, otherwise + * return -1 in the case the given key is before the first key. + * + */ + static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, Cell key, KVComparator comparator) { + int entryIndex = binarySearchNonRootIndex(key, nonRootBlock, comparator); + + if (entryIndex != -1) { + int numEntries = nonRootBlock.getInt(0); + + // The end of secondary index and the beginning of entries themselves. + int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2); + + // The offset of the entry we are interested in relative to the end of + // the secondary index. + int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT * (1 + entryIndex)); + + nonRootBlock.position(entriesOffset + entryRelOffset); + } + + return entryIndex; + } + + /** * Read in the root-level index from the given input stream. Must match * what was written into the root level by * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index acbeed2..45e0592 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; @@ -466,6 +467,8 @@ public class HFileReaderV2 extends AbstractHFileReader { */ protected byte[] nextIndexedKey; + protected Cell nextIndexedCell; + public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks, final boolean pread, final boolean isCompaction) { super(r, cacheBlocks, pread, isCompaction); @@ -504,10 +507,14 @@ public class HFileReaderV2 extends AbstractHFileReader { protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock); + @Deprecated protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, boolean rewind, byte[] key, int offset, int length, boolean seekBefore) throws IOException; + protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, + boolean rewind, Cell key, boolean seekBefore) throws IOException; + @Override public int seekTo(byte[] key, int offset, int length) throws IOException { // Always rewind to the first key of the block, because the given key @@ -543,19 +550,68 @@ public class HFileReaderV2 extends AbstractHFileReader { } @Override - public boolean seekBefore(byte[] key, int offset, int length) - throws IOException { - HFileBlock seekToBlock = - reader.getDataBlockIndexReader().seekToDataBlock(key, offset, length, - block, cacheBlocks, pread, isCompaction); + public int seekTo(Cell key) throws IOException { + return seekTo(key, true); + } + + @Override + public int reseekTo(Cell key) throws IOException { + int compared; + if (isSeeked()) { + compared = compareKey(reader.getComparator(), key); + if (compared < 1) { + // If the required key is less than or equal to current key, then + // don't do anything. + return compared; + } else { + // The comparison with no_next_index_key has to be checked + if (this.nextIndexedKey != null && + (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader + .getComparator().compareFlatKey(key, KeyValue.createKeyValueFromKey(nextIndexedKey)) < 0)) { + // The reader shall continue to scan the current data block instead + // of querying the + // block index as long as it knows the target key is strictly + // smaller than + // the next indexed key or the current data block is the last data + // block. + return loadBlockAndSeekToKey(this.block, nextIndexedKey, false, key, false); + } + } + } + // Don't rewind on a reseek operation, because reseek implies that we are + // always going forward in the file. + return seekTo(key, false); + } + + public int seekTo(Cell key, boolean rewind) throws IOException { + /* + * KeyValue keyValue = KeyValueUtil.ensureKeyValue(kv); return + * seekTo(keyValue.getBuffer(), keyValue.getKeyOffset(), + * keyValue.getKeyLength()); + */ + HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); + BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, block, + cacheBlocks, pread, isCompaction); + if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { + // This happens if the key e.g. falls before the beginning of the file. + return -1; + } + Cell nextIndexedCell = KeyValue.createKeyValueFromKey(blockWithScanInfo.getNextIndexedKey()); + return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), blockWithScanInfo.getNextIndexedKey(), rewind, key, + false); + } + + @Override + public boolean seekBefore(byte[] key, int offset, int length) throws IOException { + HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, offset, + length, block, cacheBlocks, pread, isCompaction); if (seekToBlock == null) { return false; } ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); - if (reader.getComparator().compareFlatKey(firstKey.array(), - firstKey.arrayOffset(), firstKey.limit(), key, offset, length) >= 0) - { + if (reader.getComparator().compareFlatKey(firstKey.array(), firstKey.arrayOffset(), + firstKey.limit(), key, offset, length) >= 0) { long previousBlockOffset = seekToBlock.getPrevBlockOffset(); // The key we are interested in if (previousBlockOffset == -1) { @@ -566,9 +622,8 @@ public class HFileReaderV2 extends AbstractHFileReader { // It is important that we compute and pass onDiskSize to the block // reader so that it does not have to read the header separately to // figure out the size. - seekToBlock = reader.readBlock(previousBlockOffset, - seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, - pread, isCompaction, BlockType.DATA); + seekToBlock = reader.readBlock(previousBlockOffset, seekToBlock.getOffset() + - previousBlockOffset, cacheBlocks, pread, isCompaction, BlockType.DATA); // TODO shortcut: seek forward in this block to the last key of the // block. } @@ -577,6 +632,38 @@ public class HFileReaderV2 extends AbstractHFileReader { return true; } + @Override + public boolean seekBefore(Cell key) throws IOException { + HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, block, + cacheBlocks, pread, isCompaction); + if (seekToBlock == null) { + return false; + } + ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); + + if (reader.getComparator() + .compareFlatKey( + KeyValue.createKeyValueFromKey(firstKey.array(), firstKey.arrayOffset(), + firstKey.limit()), key) >= 0) { + long previousBlockOffset = seekToBlock.getPrevBlockOffset(); + // The key we are interested in + if (previousBlockOffset == -1) { + // we have a 'problem', the key we want is the first of the file. + return false; + } + + // It is important that we compute and pass onDiskSize to the block + // reader so that it does not have to read the header separately to + // figure out the size. + seekToBlock = reader.readBlock(previousBlockOffset, seekToBlock.getOffset() + - previousBlockOffset, cacheBlocks, pread, isCompaction, BlockType.DATA); + // TODO shortcut: seek forward in this block to the last key of the + // block. + } + byte[] firstKeyInCurrentBlock = Bytes.getBytes(firstKey); + loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, true); + return true; + } /** * Scans blocks in the "scanned" section of the {@link HFile} until the next @@ -620,6 +707,8 @@ public class HFileReaderV2 extends AbstractHFileReader { */ public abstract int compareKey(KVComparator comparator, byte[] key, int offset, int length); + + public abstract int compareKey(KVComparator comparator, Cell kv); } /** @@ -786,6 +875,20 @@ public class HFileReaderV2 extends AbstractHFileReader { return blockSeek(key, offset, length, seekBefore); } + @Override + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, + boolean rewind, Cell key, boolean seekBefore) throws IOException { + if (block == null || block.getOffset() != seekToBlock.getOffset()) { + updateCurrBlock(seekToBlock); + } else if (rewind) { + blockBuffer.rewind(); + } + + // Update the nextIndexedKey + this.nextIndexedKey = nextIndexedKey; + return blockSeek(key, seekBefore); + } + /** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. @@ -810,6 +913,7 @@ public class HFileReaderV2 extends AbstractHFileReader { // Reset the next indexed key this.nextIndexedKey = null; + this.nextIndexedCell = null; } protected void readKeyValueLen() { @@ -938,6 +1042,96 @@ public class HFileReaderV2 extends AbstractHFileReader { return 1; // didn't exactly find it. } + /** + * Within a loaded block, seek looking for the last key that is smaller than + * (or equal to?) the key we are interested in. + * + * A note on the seekBefore: if you have seekBefore = true, AND the first + * key in the block = key, then you'll get thrown exceptions. The caller has + * to check for that case and load the previous block as appropriate. + * + * @param key + * the key to find + * @param seekBefore + * find the key before the given key in case of exact match. + * @return 0 in case of an exact key match, 1 in case of an inexact match, + * -2 in case of an inexact match and furthermore, the input key + * less than the first key of current block(e.g. using a faked index + * key) + */ + protected int blockSeek(Cell key, boolean seekBefore) { + int klen, vlen; + long memstoreTS = 0; + int memstoreTSLen = 0; + int lastKeyValueSize = -1; + do { + blockBuffer.mark(); + klen = blockBuffer.getInt(); + vlen = blockBuffer.getInt(); + blockBuffer.reset(); + if (this.reader.shouldIncludeMemstoreTS()) { + if (this.reader.decodeMemstoreTS) { + try { + int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position() + + KEY_VALUE_LEN_SIZE + klen + vlen; + memstoreTS = Bytes.readVLong(blockBuffer.array(), memstoreTSOffset); + memstoreTSLen = WritableUtils.getVIntSize(memstoreTS); + } catch (Exception e) { + throw new RuntimeException("Error reading memstore timestamp", e); + } + } else { + memstoreTS = 0; + memstoreTSLen = 1; + } + } + + int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE; + int comp = reader.getComparator().compareFlatKey(key, + KeyValue.createKeyValueFromKey(blockBuffer.array(), keyOffset, klen)); + + if (comp == 0) { + if (seekBefore) { + if (lastKeyValueSize < 0) { + throw new IllegalStateException("blockSeek with seekBefore " + + "at the first key of the block: key=" + Bytes.toStringBinary(key.getRowArray()) + + ", blockOffset=" + block.getOffset() + ", onDiskSize=" + + block.getOnDiskSizeWithHeader()); + } + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // non exact match. + } + currKeyLen = klen; + currValueLen = vlen; + if (this.reader.shouldIncludeMemstoreTS()) { + currMemstoreTS = memstoreTS; + currMemstoreTSLen = memstoreTSLen; + } + return 0; // indicate exact match + } else if (comp < 0) { + if (lastKeyValueSize > 0) + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + if (lastKeyValueSize == -1 && blockBuffer.position() == 0 + && this.reader.trailer.getMinorVersion() >= MINOR_VERSION_WITH_FAKED_KEY) { + return HConstants.INDEX_KEY_MAGIC; + } + return 1; + } + + // The size of this key/value tuple, including key/value length fields. + lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE; + blockBuffer.position(blockBuffer.position() + lastKeyValueSize); + } while (blockBuffer.remaining() > 0); + + // Seek to the last key we successfully read. This will happen if this is + // the last key/value pair in the file, in which case the following call + // to next() has to return false. + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // didn't exactly find it. + } + @Override protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) { ByteBuffer buffer = curBlock.getBufferWithoutHeader(); @@ -964,6 +1158,14 @@ public class HFileReaderV2 extends AbstractHFileReader { + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen); } + + @Override + public int compareKey(KVComparator comparator, Cell key) { + return comparator.compareFlatKey( + key, + KeyValue.createKeyValueFromKey(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen)); + } } /** @@ -1019,6 +1221,7 @@ public class HFileReaderV2 extends AbstractHFileReader { // Reset the next indexed key this.nextIndexedKey = null; + this.nextIndexedCell = null; } private ByteBuffer getEncodedBuffer(HFileBlock newBlock) { @@ -1133,6 +1336,23 @@ public class HFileReaderV2 extends AbstractHFileReader { this.nextIndexedKey = nextIndexedKey; return seeker.seekToKeyInBlock(key, offset, length, seekBefore); } + + @Override + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, + boolean rewind, Cell key, boolean seekBefore) throws IOException { + if (block == null || block.getOffset() != seekToBlock.getOffset()) { + updateCurrentBlock(seekToBlock); + } else if (rewind) { + seeker.rewind(); + } + this.nextIndexedKey = nextIndexedKey; + return seeker.seekToKeyInBlock(key, seekBefore); + } + + @Override + public int compareKey(KVComparator comparator, Cell key) { + return seeker.compareKey(comparator, key); + } } /** diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java index 4ed56c6..35aae26 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java @@ -26,7 +26,9 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.crypto.Cipher; @@ -244,7 +246,121 @@ public class HFileReaderV3 extends HFileReaderV2 { * less than the first key of current block(e.g. using a faked index * key) */ - protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) { + @Override + protected int blockSeek(Cell key, boolean seekBefore) { + int klen, vlen, tlen = 0; + long memstoreTS = 0; + int memstoreTSLen = 0; + int lastKeyValueSize = -1; + do { + blockBuffer.mark(); + klen = blockBuffer.getInt(); + vlen = blockBuffer.getInt(); + if (klen < 0 || vlen < 0 || klen > blockBuffer.limit() + || vlen > blockBuffer.limit()) { + throw new IllegalStateException("Invalid klen " + klen + " or vlen " + + vlen + ". Block offset: " + + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + + blockBuffer.position() + " (without header)."); + } + ByteBufferUtils.skip(blockBuffer, klen + vlen); + if (reader.hfileContext.isIncludesTags()) { + tlen = blockBuffer.getShort(); + if (tlen < 0 || tlen > blockBuffer.limit()) { + throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " + + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + + blockBuffer.position() + " (without header)."); + } + ByteBufferUtils.skip(blockBuffer, tlen); + } + if (this.reader.shouldIncludeMemstoreTS()) { + if (this.reader.decodeMemstoreTS) { + try { + memstoreTS = Bytes.readVLong(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position()); + memstoreTSLen = WritableUtils.getVIntSize(memstoreTS); + } catch (Exception e) { + throw new RuntimeException("Error reading memstore timestamp", e); + } + } else { + memstoreTS = 0; + memstoreTSLen = 1; + } + } + blockBuffer.reset(); + int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2); + int comp = reader.getComparator().compareFlatKey(key, + KeyValue.createKeyValueFromKey(blockBuffer.array(), keyOffset, klen)); + + if (comp == 0) { + if (seekBefore) { + if (lastKeyValueSize < 0) { + throw new IllegalStateException("blockSeek with seekBefore " + + "at the first key of the block: key=" + Bytes.toStringBinary(key.getRowArray()) + + ", blockOffset=" + block.getOffset() + ", onDiskSize=" + + block.getOnDiskSizeWithHeader()); + } + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // non exact match. + } + currKeyLen = klen; + currValueLen = vlen; + currTagsLen = tlen; + if (this.reader.shouldIncludeMemstoreTS()) { + currMemstoreTS = memstoreTS; + currMemstoreTSLen = memstoreTSLen; + } + return 0; // indicate exact match + } else if (comp < 0) { + if (lastKeyValueSize > 0) + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { + return HConstants.INDEX_KEY_MAGIC; + } + return 1; + } + + // The size of this key/value tuple, including key/value length fields. + lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE; + // include tag length also if tags included with KV + if (reader.hfileContext.isIncludesTags()) { + lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; + } + blockBuffer.position(blockBuffer.position() + lastKeyValueSize); + } while (blockBuffer.remaining() > 0); + + // Seek to the last key we successfully read. This will happen if this is + // the last key/value pair in the file, in which case the following call + // to next() has to return false. + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // didn't exactly find it. + } + + /** + * Within a loaded block, seek looking for the last key that is smaller than + * (or equal to?) the key we are interested in. + * A note on the seekBefore: if you have seekBefore = true, AND the first + * key in the block = key, then you'll get thrown exceptions. The caller has + * to check for that case and load the previous block as appropriate. + * @param key + * the key to find + * @param seekBefore + * find the key before the given key in case of exact match. + * @param offset + * Offset to find the key in the given bytebuffer + * @param length + * Length of the key to be found + * @return 0 in case of an exact key match, 1 in case of an inexact match, + * -2 in case of an inexact match and furthermore, the input key + * less than the first key of current block(e.g. using a faked index + * key) + */ + @Override + protected int blockSeek(byte[] key, int offset, int length, + boolean seekBefore) { int klen, vlen, tlen = 0; long memstoreTS = 0; int memstoreTSLen = 0; @@ -286,8 +402,8 @@ public class HFileReaderV3 extends HFileReaderV2 { } blockBuffer.reset(); int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2); - int comp = reader.getComparator().compare(key, offset, length, blockBuffer.array(), - keyOffset, klen); + int comp = reader.getComparator().compareFlatKey(key, offset, length, + blockBuffer.array(), keyOffset, klen); if (comp == 0) { if (seekBefore) { diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java index 0e353ef..840a78c 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.KeyValue; /** @@ -54,8 +55,12 @@ public interface HFileScanner { * false when it is called. * @throws IOException */ + @Deprecated int seekTo(byte[] key) throws IOException; + @Deprecated int seekTo(byte[] key, int offset, int length) throws IOException; + + int seekTo(Cell kv) throws IOException; /** * Reseek to or just before the passed key. Similar to seekTo * except that this can be called even if the scanner is not at the beginning @@ -76,8 +81,12 @@ public interface HFileScanner { * 1, such that k[i] < key, and scanner is left in position i. * @throws IOException */ + @Deprecated int reseekTo(byte[] key) throws IOException; + @Deprecated int reseekTo(byte[] key, int offset, int length) throws IOException; + + int reseekTo(Cell kv) throws IOException; /** * Consider the key stream of all the keys in the file, * k[0] .. k[n], where there are n keys in the file. @@ -88,8 +97,12 @@ public interface HFileScanner { * return false (EOF). * @throws IOException */ + @Deprecated boolean seekBefore(byte[] key) throws IOException; + @Deprecated boolean seekBefore(byte[] key, int offset, int length) throws IOException; + + boolean seekBefore(Cell kv) throws IOException; /** * Positions this scanner at the start of the file. * @return False if empty file; i.e. a call to next would return false and diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index f7dfb17..805bf40 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1648,8 +1648,7 @@ public class HStore implements Store { KeyValue kv = firstOnRow; // If firstOnRow < firstKV, set to firstKV if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV; - int result = scanner.seekTo(kv.getBuffer(), kv.getKeyOffset(), - kv.getKeyLength()); + int result = scanner.seekTo(kv); return result != -1; } diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index ecd1793..0440102 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -235,7 +235,7 @@ public class StoreFileScanner implements KeyValueScanner { */ public static boolean seekAtOrAfter(HFileScanner s, KeyValue k) throws IOException { - int result = s.seekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength()); + int result = s.seekTo(k); if(result < 0) { if (result == HConstants.INDEX_KEY_MAGIC) { // using faked key @@ -255,7 +255,7 @@ public class StoreFileScanner implements KeyValueScanner { static boolean reseekAtOrAfter(HFileScanner s, KeyValue k) throws IOException { //This function is similar to seekAtOrAfter function - int result = s.reseekTo(k.getBuffer(), k.getKeyOffset(), k.getKeyLength()); + int result = s.reseekTo(k); if (result <= 0) { if (result == HConstants.INDEX_KEY_MAGIC) { // using faked key diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java index 1e0b0e8..29ce3d2 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java @@ -33,7 +33,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; -import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileContext; @@ -127,12 +126,12 @@ public class TestHalfStoreFileReader { curr = scanner.getKeyValue(); KeyValue reseekKv = getLastOnCol(curr); - int ret = scanner.reseekTo(reseekKv.getKey()); + int ret = scanner.reseekTo(reseekKv); assertTrue("reseek to returned: " + ret, ret > 0); //System.out.println(curr + ": " + ret); } while (scanner.next()); - int ret = scanner.reseekTo(getLastOnCol(curr).getKey()); + int ret = scanner.reseekTo(getLastOnCol(curr)); //System.out.println("Last reseek: " + ret); assertTrue( ret > 0 ); @@ -221,7 +220,7 @@ public class TestHalfStoreFileReader { cacheConfig, bottom, TEST_UTIL.getConfiguration()); halfreader.loadFileInfo(); final HFileScanner scanner = halfreader.getScanner(false, false); - scanner.seekBefore(seekBefore.getKey()); + scanner.seekBefore(seekBefore); return scanner.getKeyValue(); } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java index 20463f7..3aafb9d 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -247,16 +247,17 @@ public class TestHFile extends HBaseTestCase { // Align scanner at start of the file. scanner.seekTo(); readAllRecords(scanner); - scanner.seekTo(getSomeKey(50)); - assertTrue("location lookup failed", scanner.seekTo(getSomeKey(50)) == 0); + int seekTo = scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(50))); + System.out.println(seekTo); + assertTrue("location lookup failed", scanner.seekTo(KeyValue.createKeyValueFromKey(getSomeKey(50))) == 0); // read the key and see if it matches ByteBuffer readKey = scanner.getKey(); assertTrue("seeked key does not match", Arrays.equals(getSomeKey(50), Bytes.toBytes(readKey))); - scanner.seekTo(new byte[0]); + scanner.seekTo(KeyValue.createKeyValueFromKey(new byte[0])); ByteBuffer val1 = scanner.getValue(); - scanner.seekTo(new byte[0]); + scanner.seekTo(KeyValue.createKeyValueFromKey(new byte[0])); ByteBuffer val2 = scanner.getValue(); assertTrue(Arrays.equals(Bytes.toBytes(val1), Bytes.toBytes(val2))); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index 7b77c15..7df509c 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -626,7 +626,7 @@ public class TestHFileBlockIndex { private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException { assertEquals("Failed to seek to key #" + i + " (" - + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(keys[i])); + + Bytes.toStringBinary(keys[i]) + ")", 0, scanner.seekTo(KeyValue.createKeyValueFromKey(keys[i]))); } private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java index 3556b79..51868a1 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java @@ -17,7 +17,10 @@ */ package org.apache.hadoop.hbase.io.hfile; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -44,7 +47,6 @@ import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; - import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -177,7 +179,7 @@ public class TestHFileEncryption { cryptoContext.getKeyBytes())); } - @Test(timeout=60000) + @Test(timeout=6000000) public void testHFileEncryption() throws Exception { // Create 1000 random test KVs RedundantKVGenerator generator = new RedundantKVGenerator(); @@ -233,7 +235,7 @@ public class TestHFileEncryption { assertTrue("Initial seekTo failed", scanner.seekTo()); for (i = 0; i < 100; i++) { KeyValue kv = testKvs.get(RNG.nextInt(testKvs.size())); - assertEquals("Unable to find KV as expected: " + kv, scanner.seekTo(kv.getKey()), 0); + assertEquals("Unable to find KV as expected: " + kv, scanner.seekTo(kv), 0); } reader.close(); } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java index fb9f183..23334ac 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java @@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; @@ -80,7 +81,7 @@ public class TestHFileInlineToRootChunkConversion { HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs, hfPath, cacheConf, conf); HFileScanner scanner = reader.getScanner(true, true); for (int i = 0; i < keys.size(); ++i) { - scanner.seekTo(keys.get(i)); + scanner.seekTo(KeyValue.createKeyValueFromKey(keys.get(i))); } reader.close(); } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java index f42cfc3..7310525 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java @@ -193,7 +193,7 @@ public class TestHFileSeek extends TestCase { kSampler.next(key); byte [] k = new byte [key.getLength()]; System.arraycopy(key.getBytes(), 0, k, 0, key.getLength()); - if (scanner.seekTo(k) >= 0) { + if (scanner.seekTo(KeyValue.createKeyValueFromKey(k)) >= 0) { ByteBuffer bbkey = scanner.getKey(); ByteBuffer bbval = scanner.getValue(); totalBytes += bbkey.limit(); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewReseekTo.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewReseekTo.java new file mode 100644 index 0000000..1f32ff0 --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewReseekTo.java @@ -0,0 +1,124 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Test {@link HFileScanner#reseekTo(byte[])} + */ +@Category(SmallTests.class) +public class TestNewReseekTo { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @Test + public void testReseekTo() throws Exception { + testReseekToInternals(TagUsage.NO_TAG); + testReseekToInternals(TagUsage.ONLY_TAG); + testReseekToInternals(TagUsage.PARTIAL_TAG); + } + + private void testReseekToInternals(TagUsage tagUsage) throws IOException { + Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile"); + FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile); + if(tagUsage != TagUsage.NO_TAG){ + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } + CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); + HFileContext context = new HFileContextBuilder().withBlockSize(4000).build(); + HFile.Writer writer = HFile.getWriterFactory( + TEST_UTIL.getConfiguration(), cacheConf) + .withOutputStream(fout) + .withFileContext(context) + // NOTE: This test is dependent on this deprecated nonstandard comparator + .withComparator(new KeyValue.RawBytesComparator()) + .create(); + int numberOfKeys = 1000; + + String valueString = "Value"; + + List keyList = new ArrayList(); + List valueList = new ArrayList(); + + for (int key = 0; key < numberOfKeys; key++) { + String value = valueString + key; + keyList.add(key); + valueList.add(value); + if(tagUsage == TagUsage.NO_TAG){ + writer.append(Bytes.toBytes(key), Bytes.toBytes(value)); + } else if (tagUsage == TagUsage.ONLY_TAG) { + Tag t = new Tag((byte) 1, "myTag1"); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), t.getBuffer()); + } else { + if (key % 4 == 0) { + Tag t = new Tag((byte) 1, "myTag1"); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), t.getBuffer()); + } else { + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), HConstants.EMPTY_BYTE_ARRAY); + } + } + } + writer.close(); + fout.close(); + + HFile.Reader reader = HFile.createReader(TEST_UTIL.getTestFileSystem(), + ncTFile, cacheConf, TEST_UTIL.getConfiguration()); + reader.loadFileInfo(); + HFileScanner scanner = reader.getScanner(false, true); + + scanner.seekTo(); + for (int i = 0; i < keyList.size(); i++) { + Integer key = keyList.get(i); + String value = valueList.get(i); + long start = System.nanoTime(); + scanner.seekTo(KeyValue.createKeyValueFromKey(Bytes.toBytes(key))); + assertEquals(value, scanner.getValueString()); + } + + scanner.seekTo(); + for (int i = 0; i < keyList.size(); i += 10) { + Integer key = keyList.get(i); + String value = valueList.get(i); + long start = System.nanoTime(); + scanner.reseekTo(KeyValue.createKeyValueFromKey(Bytes.toBytes(key))); + assertEquals("i is " + i, value, scanner.getValueString()); + } + + reader.close(); + } + + +} + diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewSeekTo.java hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewSeekTo.java new file mode 100644 index 0000000..cc11398 --- /dev/null +++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestNewSeekTo.java @@ -0,0 +1,296 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.experimental.categories.Category; + +/** + * Test {@link HFileScanner#seekTo(byte[])} and its variants. + */ +@Category(SmallTests.class) +public class TestNewSeekTo extends HBaseTestCase { + + static boolean switchKVs = false; + + static KeyValue toKV(String row, TagUsage tagUsage) { + if (tagUsage == TagUsage.NO_TAG) { + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), + Bytes.toBytes("value")); + } else if (tagUsage == TagUsage.ONLY_TAG) { + Tag t = new Tag((byte) 1, "myTag1"); + Tag[] tags = new Tag[1]; + tags[0] = t; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), + HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); + } else { + if (!switchKVs) { + switchKVs = true; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), + Bytes.toBytes("qualifier"), Bytes.toBytes("value")); + } else { + switchKVs = false; + Tag t = new Tag((byte) 1, "myTag1"); + Tag[] tags = new Tag[1]; + tags[0] = t; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), + Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); + } + } + } + static String toRowStr(KeyValue kv) { + return Bytes.toString(kv.getRow()); + } + + Path makeNewFile(TagUsage tagUsage) throws IOException { + Path ncTFile = new Path(this.testDir, "basic.hfile"); + if (tagUsage != TagUsage.NO_TAG) { + conf.setInt("hfile.format.version", 3); + } else { + conf.setInt("hfile.format.version", 2); + } + FSDataOutputStream fout = this.fs.create(ncTFile); + int blocksize = toKV("a", tagUsage).getLength() * 3; + HFileContext context = new HFileContextBuilder().withBlockSize(blocksize) + .withIncludesTags(true).build(); + HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout) + .withFileContext(context) + .withComparator(KeyValue.COMPARATOR).create(); + // 4 bytes * 3 * 2 for each key/value + + // 3 for keys, 15 for values = 42 (woot) + writer.append(toKV("c", tagUsage)); + writer.append(toKV("e", tagUsage)); + writer.append(toKV("g", tagUsage)); + // block transition + writer.append(toKV("i", tagUsage)); + writer.append(toKV("k", tagUsage)); + writer.close(); + fout.close(); + return ncTFile; + } + + public void testSeekBefore() throws Exception { + testSeekBeforeInternals(TagUsage.NO_TAG); + testSeekBeforeInternals(TagUsage.ONLY_TAG); + testSeekBeforeInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); + HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf); + reader.loadFileInfo(); + HFileScanner scanner = reader.getScanner(false, true); + assertEquals(false, scanner.seekBefore(toKV("a", tagUsage))); + + assertEquals(false, scanner.seekBefore(toKV("c", tagUsage))); + + assertEquals(true, scanner.seekBefore(toKV("d", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + + assertEquals(true, scanner.seekBefore(toKV("e", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + + assertEquals(true, scanner.seekBefore(toKV("f", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + + assertEquals(true, scanner.seekBefore(toKV("g", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + + assertEquals(true, scanner.seekBefore(toKV("h", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + assertEquals(true, scanner.seekBefore(toKV("i", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + assertEquals(true, scanner.seekBefore(toKV("j", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + assertEquals(true, scanner.seekBefore(toKV("k", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + assertEquals(true, scanner.seekBefore(toKV("l", tagUsage))); + assertEquals("k", toRowStr(scanner.getKeyValue())); + + reader.close(); + } + + public void testSeekBeforeWithReSeekTo() throws Exception { + testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG); + testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG); + testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); + HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf); + reader.loadFileInfo(); + HFileScanner scanner = reader.getScanner(false, true); + assertEquals(false, scanner.seekBefore(toKV("a", tagUsage))); + assertEquals(false, scanner.seekBefore(toKV("b", tagUsage))); + assertEquals(false, scanner.seekBefore(toKV("c", tagUsage))); + + // seekBefore d, so the scanner points to c + assertEquals(true, scanner.seekBefore(toKV("d", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("c", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore e, so the scanner points to c + assertEquals(true, scanner.seekBefore(toKV("e", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore f, so the scanner points to e + assertEquals(true, scanner.seekBefore(toKV("f", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore g, so the scanner points to e + assertEquals(true, scanner.seekBefore(toKV("g", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + // reseekTo e and g again + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage))); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore h, so the scanner points to g + assertEquals(true, scanner.seekBefore(toKV("h", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + // reseekTo g + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore i, so the scanner points to g + assertEquals(true, scanner.seekBefore(toKV("i", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + // reseekTo g + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage))); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore j, so the scanner points to i + assertEquals(true, scanner.seekBefore(toKV("j", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + // reseekTo i + assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + + // seekBefore k, so the scanner points to i + assertEquals(true, scanner.seekBefore(toKV("k", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + // reseekTo i and k + assertEquals(0, scanner.reseekTo(toKV("i", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); + assertEquals("k", toRowStr(scanner.getKeyValue())); + + // seekBefore l, so the scanner points to k + assertEquals(true, scanner.seekBefore(toKV("l", tagUsage))); + assertEquals("k", toRowStr(scanner.getKeyValue())); + // reseekTo k + assertEquals(0, scanner.reseekTo(toKV("k", tagUsage))); + assertEquals("k", toRowStr(scanner.getKeyValue())); + } + + public void testSeekTo() throws Exception { + testSeekToInternals(TagUsage.NO_TAG); + testSeekToInternals(TagUsage.ONLY_TAG); + testSeekToInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekToInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); + HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf); + reader.loadFileInfo(); + assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount()); + HFileScanner scanner = reader.getScanner(false, true); + // lies before the start of the file. + assertEquals(-1, scanner.seekTo(toKV("a", tagUsage))); + + assertEquals(1, scanner.seekTo(toKV("d", tagUsage))); + assertEquals("c", toRowStr(scanner.getKeyValue())); + + // Across a block boundary now. + // h goes to the next block + assertEquals(-2, scanner.seekTo(toKV("h", tagUsage))); + assertEquals("i", toRowStr(scanner.getKeyValue())); + + assertEquals(1, scanner.seekTo(toKV("l", tagUsage))); + assertEquals("k", toRowStr(scanner.getKeyValue())); + + reader.close(); + } + public void testBlockContainingKey() throws Exception { + testBlockContainingKeyInternals(TagUsage.NO_TAG); + testBlockContainingKeyInternals(TagUsage.ONLY_TAG); + testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG); + } + + protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); + HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf); + reader.loadFileInfo(); + HFileBlockIndex.BlockIndexReader blockIndexReader = + reader.getDataBlockIndexReader(); + System.out.println(blockIndexReader.toString()); + int klen = toKV("a", tagUsage).getKey().length; + // falls before the start of the file. + assertEquals(-1, blockIndexReader.rootBlockContainingKey( + toKV("a", tagUsage))); + assertEquals(0, blockIndexReader.rootBlockContainingKey( + toKV("c", tagUsage))); + assertEquals(0, blockIndexReader.rootBlockContainingKey( + toKV("d", tagUsage))); + assertEquals(0, blockIndexReader.rootBlockContainingKey( + toKV("e", tagUsage))); + assertEquals(0, blockIndexReader.rootBlockContainingKey( + toKV("g", tagUsage))); + assertEquals(1, blockIndexReader.rootBlockContainingKey( + toKV("h", tagUsage))); + assertEquals(1, blockIndexReader.rootBlockContainingKey( + toKV("i", tagUsage))); + assertEquals(1, blockIndexReader.rootBlockContainingKey( + toKV("j", tagUsage))); + assertEquals(1, blockIndexReader.rootBlockContainingKey( + toKV("k", tagUsage))); + assertEquals(1, blockIndexReader.rootBlockContainingKey( + toKV("l", tagUsage))); + + reader.close(); + } + + +} +