From 8252e38572d6fcc1dee1ac1249aec82c7917357a Mon Sep 17 00:00:00 2001 From: Walter Koetke Date: Tue, 29 Mar 2016 07:38:03 -0700 Subject: [PATCH] HBASE-15480: added api for bloom check for array of keys, to avoid overhead of calling for one key at a time. --- .../org/apache/hadoop/hbase/util/BloomFilter.java | 13 ++++ .../apache/hadoop/hbase/util/ByteBloomFilter.java | 12 +++ .../hadoop/hbase/util/CompoundBloomFilter.java | 91 +++++++++++++++------- 3 files changed, 88 insertions(+), 28 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilter.java index 163852c..a4e1297 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/BloomFilter.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util; import java.nio.ByteBuffer; +import java.util.BitSet; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -56,6 +57,18 @@ public interface BloomFilter extends BloomFilterBase { boolean contains(byte [] buf, int offset, int length, ByteBuffer bloom); /** + * Check if the specified keys are contained in the bloom filter. + * + * @param buf data to check for existence of + * @param offset offset into the data + * @param length length of the data + * @param bloom bloom filter data to search. This can be null if auto-loading + * is supported. + * @return true if matched by bloom, false if not + */ + BitSet contains(byte [][] buf, int[] offset, int[] length, ByteBuffer bloom); + + /** * @return true if this Bloom filter can automatically load its data * and thus allows a null byte buffer to be passed to contains() */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java index 56c3776..db788e2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBloomFilter.java @@ -24,6 +24,7 @@ import java.io.DataOutput; import java.io.IOException; import java.nio.ByteBuffer; import java.text.NumberFormat; +import java.util.BitSet; import java.util.Random; import org.apache.hadoop.hbase.classification.InterfaceAudience; @@ -401,6 +402,17 @@ public class ByteBloomFilter implements BloomFilter, BloomFilterWriter { } @Override + public BitSet contains(byte[][] buf, int[] offset, int[] length, ByteBuffer bloom) { + // Does not optimize block cache access, like CompoundBloomFilter implementation does. + // But this method is here for backward compatibility + BitSet bitSet = new BitSet(buf.length); + for (int i =0;i= 0) { - // Update statistics. Only used in unit tests. - ++numQueriesPerChunk[block]; - if (result) - ++numPositivesPerChunk[block]; + HFileBlock bloomBlock = getHFileBlock(block,key,keyOffset,keyLength); + result = bloomContains(bloomBlock,key,keyOffset,keyLength); } - + updateTestingStats(block,result); return result; } @@ -176,4 +182,33 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase return sb.toString(); } + private HFileBlock getHFileBlock(int block, byte[] key, int keyOffset, int keyLength) { + try { + // We cache the block and use a positional read. + return reader.readBlock(index.getRootBlockOffset(block), + index.getRootBlockDataSize(block), true, true, false, true, + BlockType.BLOOM_CHUNK, null); + } catch (IOException ex) { + // The Bloom filter is broken, turn it off. + throw new IllegalArgumentException( + "Failed to load Bloom block for key " + + Bytes.toStringBinary(key, keyOffset, keyLength), ex); + } + } + + private void updateTestingStats(int block, boolean result) { + if (numQueriesPerChunk != null && block >= 0) { + ++numQueriesPerChunk[block]; + if (result) + ++numPositivesPerChunk[block]; + } + } + + private boolean bloomContains(HFileBlock bloomBlock,byte[] key, int keyOffset,int keyLength) { + ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly(); + return ByteBloomFilter.contains(key, keyOffset, keyLength, + bloomBuf, bloomBlock.headerSize(), + bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); + } + } -- 2.3.2 (Apple Git-55)