.../java/org/apache/hadoop/hbase/util/Bytes.java | 40 +++++++++++++++++++++ .../hadoop/hbase/io/hfile/FixedFileTrailer.java | 42 +++++++++++----------- .../hadoop/hbase/regionserver/StoreFile.java | 41 ++++++++++++++------- .../apache/hadoop/hbase/util/BloomFilterBase.java | 5 --- .../hadoop/hbase/util/BloomFilterFactory.java | 4 +-- .../apache/hadoop/hbase/util/ByteBloomFilter.java | 6 ---- .../hadoop/hbase/util/CompoundBloomFilter.java | 18 +++++----- .../hadoop/hbase/util/CompoundBloomFilterBase.java | 6 ---- .../hbase/util/CompoundBloomFilterWriter.java | 8 +++-- 9 files changed, 109 insertions(+), 61 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 21b0a99..739ae37 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -2045,6 +2045,23 @@ public class Bytes implements Comparable { } /** + * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR + * + * @param arr array of byte arrays to search for + * @param key the key you want to find + * @param offset the offset in the key you want to find + * @param length the length of the key + * @return zero-based index of the key, if the key is present in the array. + * Otherwise, a value -(i + 1) such that the key is between arr[i - + * 1] and arr[i] non-inclusively, where i is in [0, i], if we define + * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above + * means that this function can return 2N + 1 different values + * ranging from -(N + 1) to N - 1. + */ + public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { + return binarySearch(arr, key, offset, length, Bytes.BYTES_RAWCOMPARATOR); + } + /** * Binary search for keys in indexes. * * @param arr array of byte arrays to search for @@ -2059,8 +2076,12 @@ public class Bytes implements Comparable { * means that this function can return 2N + 1 different values * ranging from -(N + 1) to N - 1. */ + @Deprecated public static int binarySearch(byte [][]arr, byte []key, int offset, int length, RawComparator comparator) { + if(comparator == null) { + comparator = Bytes.BYTES_RAWCOMPARATOR; + } int low = 0; int high = arr.length - 1; @@ -2097,7 +2118,26 @@ public class Bytes implements Comparable { * ranging from -(N + 1) to N - 1. * @return the index of the block */ + @Deprecated public static int binarySearch(byte[][] arr, Cell key, RawComparator comparator) { + return binarySearch(arr, key, (Comparator)comparator); + } + + /** + * Binary search for keys in indexes. + * + * @param arr array of byte arrays to search for + * @param key the key you want to find + * @param comparator a comparator to compare. + * @return zero-based index of the key, if the key is present in the array. + * Otherwise, a value -(i + 1) such that the key is between arr[i - + * 1] and arr[i] non-inclusively, where i is in [0, i], if we define + * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above + * means that this function can return 2N + 1 different values + * ranging from -(N + 1) to N - 1. + * @return the index of the block + */ + public static int binarySearch(byte[][] arr, Cell key, Comparator comparator) { int low = 0; int high = arr.length - 1; KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java index 3dcfc9b..40ab181 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.protobuf.generated.HFileProtos; import org.apache.hadoop.hbase.util.Bytes; + /** * The {@link HFile} has a fixed trailer which contains offsets to other * variable parts of the file. Also includes basic metadata on this file. The @@ -107,7 +108,8 @@ public class FixedFileTrailer { private long lastDataBlockOffset; /** Raw key comparator class name in version 3 */ - private String comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName(); + // We could write the actual class name from 2.0 onwards and handle BC + private String comparatorClassName = KeyValue.COMPARATOR.getClass().getName(); /** The encryption key */ private byte[] encryptionKey; @@ -200,7 +202,7 @@ public class FixedFileTrailer { .setNumDataIndexLevels(numDataIndexLevels) .setFirstDataBlockOffset(firstDataBlockOffset) .setLastDataBlockOffset(lastDataBlockOffset) - // TODO this is a classname encoded into an HFile's trailer. We are going to need to have + // TODO this is a classname encoded into an HFile's trailer. We are going to need to have // some compat code here. .setComparatorClassName(comparatorClassName) .setCompressionCodec(compressionCodec.ordinal()); @@ -539,25 +541,17 @@ public class FixedFileTrailer { public void setComparatorClass(Class klass) { // Is the comparator instantiable? try { - KVComparator comp = klass.newInstance(); - - // HFile V2 legacy comparator class names. - if (KeyValue.COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.COMPARATOR.getLegacyKeyComparatorName(); - } else if (KeyValue.META_COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.META_COMPARATOR.getLegacyKeyComparatorName(); - } else if (KeyValue.RAW_COMPARATOR.getClass().equals(klass)) { - comparatorClassName = KeyValue.RAW_COMPARATOR.getLegacyKeyComparatorName(); - } else { - // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator. + // If null, it should be the Bytes.BYTES_RAWCOMPARATOR + if (klass != null) { + KVComparator comp = klass.newInstance(); + // if the name wasn't one of the legacy names, maybe its a legit new + // kind of comparator. comparatorClassName = klass.getName(); } } catch (Exception e) { - throw new RuntimeException("Comparator class " + klass.getName() + - " is not instantiable", e); + throw new RuntimeException("Comparator class " + klass.getName() + " is not instantiable", e); } - } @SuppressWarnings("unchecked") @@ -574,9 +568,12 @@ public class FixedFileTrailer { } // if the name wasn't one of the legacy names, maybe its a legit new kind of comparator. - - return (Class) - Class.forName(comparatorClassName); + if (comparatorClassName.equals(KeyValue.RAW_COMPARATOR.getClass().getName())) { + // Return null for Bytes.BYTES_RAWCOMPARATOR + return null; + } else { + return (Class) Class.forName(comparatorClassName); + } } catch (ClassNotFoundException ex) { throw new IOException(ex); } @@ -585,7 +582,12 @@ public class FixedFileTrailer { public static KVComparator createComparator( String comparatorClassName) throws IOException { try { - return getComparatorClass(comparatorClassName).newInstance(); + Class comparatorClass = getComparatorClass(comparatorClassName); + if (comparatorClass != null) { + return comparatorClass.newInstance(); + } else { + return null; + } } catch (InstantiationException e) { throw new IOException("Comparator class " + comparatorClassName + " is not instantiable", e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index 345dd9b..954db6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -884,15 +884,22 @@ public class StoreFile { " (ROW or ROWCOL expected)"); } generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen); - if (lastBloomKey != null - && generalBloomFilterWriter.getComparator().compareFlatKey(bloomKey, - bloomKeyOffset, bloomKeyLen, lastBloomKey, - lastBloomKeyOffset, lastBloomKeyLen) <= 0) { - throw new IOException("Non-increasing Bloom keys: " - + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) - + " after " - + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, - lastBloomKeyLen)); + if (lastBloomKey != null) { + int res = 0; + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells. We could use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom + if (bloomType == BloomType.ROW) { + res = Bytes.BYTES_RAWCOMPARATOR.compare(bloomKey, bloomKeyOffset, bloomKeyLen, + lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen); + } else { + res = KeyValue.COMPARATOR.compareFlatKey(bloomKey, + bloomKeyOffset, bloomKeyLen, lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen); + } + if (res <= 0) { + throw new IOException("Non-increasing Bloom keys: " + + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) + " after " + + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen)); + } } lastBloomKey = bloomKey; lastBloomKeyOffset = bloomKeyOffset; @@ -913,6 +920,8 @@ public class StoreFile { if (null != this.deleteFamilyBloomFilterWriter) { boolean newKey = true; if (lastDeleteFamilyCell != null) { + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells newKey = !kvComparator.matchingRows(cell, lastDeleteFamilyCell); } if (newKey) { @@ -1280,8 +1289,16 @@ public class StoreFile { // Whether the primary Bloom key is greater than the last Bloom key // from the file info. For row-column Bloom filters this is not yet // a sufficient condition to return false. - boolean keyIsAfterLast = lastBloomKey != null - && bloomFilter.getComparator().compareFlatKey(key, lastBloomKey) > 0; + boolean keyIsAfterLast = (lastBloomKey != null); + // hbase:meta does not have blooms. So we need not have special interpretation + // of the hbase:meta cells. We could use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom + if (keyIsAfterLast) { + if (bloomFilterType == BloomType.ROW) { + keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0); + } else { + keyIsAfterLast = (KeyValue.COMPARATOR.compareFlatKey(key, lastBloomKey)) > 0; + } + } if (bloomFilterType == BloomType.ROWCOL) { // Since a Row Delete is essentially a DeleteFamily applied to all @@ -1292,7 +1309,7 @@ public class StoreFile { null, 0, 0); if (keyIsAfterLast - && bloomFilter.getComparator().compareFlatKey(rowBloomKey, + && KeyValue.COMPARATOR.compareFlatKey(rowBloomKey, lastBloomKey) > 0) { exists = false; } else { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java index 3b9ca9a..8198726 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterBase.java @@ -49,9 +49,4 @@ public interface BloomFilterBase { byte[] createBloomKey(byte[] rowBuf, int rowOffset, int rowLen, byte[] qualBuf, int qualOffset, int qualLen); - /** - * @return Bloom key comparator - */ - KVComparator getComparator(); - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java index 5ff7207..71908ab 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilterFactory.java @@ -199,7 +199,7 @@ public final class BloomFilterFactory { // In case of compound Bloom filters we ignore the maxKeys hint. CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), - bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : KeyValue.RAW_COMPARATOR); + bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : null); writer.addInlineBlockWriter(bloomWriter); return bloomWriter; } @@ -230,7 +230,7 @@ public final class BloomFilterFactory { // In case of compound Bloom filters we ignore the maxKeys hint. CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), - KeyValue.RAW_COMPARATOR); + null); writer.addInlineBlockWriter(bloomWriter); return bloomWriter; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java index 56c3776..4576448 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java @@ -625,12 +625,6 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter { return result; } - @Override - public KVComparator getComparator() { -// return Bytes.BYTES_RAWCOMPARATOR; - return KeyValue.RAW_COMPARATOR; - } - /** * A human-readable string with statistics for the given Bloom filter. * diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java index beda805..e8f0414 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java @@ -70,8 +70,13 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase totalKeyCount = meta.readLong(); totalMaxKeys = meta.readLong(); numChunks = meta.readInt(); - comparator = FixedFileTrailer.createComparator( - Bytes.toString(Bytes.readByteArray(meta))); + byte[] comparatorClassName = Bytes.readByteArray(meta); + // The writer would have return 0 as the vint length for the case of + // Bytes.BYTES_RAWCOMPARATOR. In such cases do not initialize comparator, it can be + // null + if (comparatorClassName.length != 0) { + comparator = FixedFileTrailer.createComparator(Bytes.toString(comparatorClassName)); + } hash = Hash.getInstance(hashType); if (hash == null) { @@ -131,11 +136,6 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase return numChunks; } - @Override - public KVComparator getComparator() { - return comparator; - } - public void enableTestingStats() { numQueriesPerChunk = new long[numChunks]; numPositivesPerChunk = new long[numChunks]; @@ -172,7 +172,9 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase sb.append(ByteBloomFilter.STATS_RECORD_SEP + "Number of chunks: " + numChunks); sb.append(ByteBloomFilter.STATS_RECORD_SEP + - "Comparator: " + comparator.getClass().getSimpleName()); + ((comparator != null) ? "Comparator: " + + comparator.getClass().getSimpleName() : "Comparator: " + + Bytes.BYTES_RAWCOMPARATOR.getClass().getSimpleName())); return sb.toString(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java index af9fa00..0ba46e6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterBase.java @@ -50,7 +50,6 @@ public class CompoundBloomFilterBase implements BloomFilterBase { /** Hash function type to use, as defined in {@link Hash} */ protected int hashType; - /** Comparator used to compare Bloom filter keys */ protected KVComparator comparator; @@ -89,9 +88,4 @@ public class CompoundBloomFilterBase implements BloomFilterBase { return kv.getKey(); } - @Override - public KVComparator getComparator() { - return comparator; - } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java index d436a98..1cef523 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilterWriter.java @@ -243,8 +243,12 @@ public class CompoundBloomFilterWriter extends CompoundBloomFilterBase // Fields that don't have equivalents in ByteBloomFilter. out.writeInt(numChunks); - Bytes.writeByteArray(out, - Bytes.toBytes(comparator.getClass().getName())); + if (comparator != null) { + Bytes.writeByteArray(out, Bytes.toBytes(comparator.getClass().getName())); + } else { + // Internally writes a 0 vint if the byte[] is null + Bytes.writeByteArray(out, null); + } // Write a single-level index without compression or block header. bloomBlockIndexWriter.writeSingleLevelIndex(out, "Bloom filter");