diff --git storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java index 5b1914d..3b44d2b 100644 --- storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java +++ storage-api/src/java/org/apache/hive/common/util/BloomKFilter.java @@ -36,8 +36,6 @@ * This implementation has much lesser L1 data cache misses than {@link BloomFilter}. */ public class BloomKFilter { - private final byte[] BYTE_ARRAY_4 = new byte[4]; - private final byte[] BYTE_ARRAY_8 = new byte[8]; public static final float DEFAULT_FPP = 0.05f; private static final int DEFAULT_BLOCK_SIZE = 8; private static final int DEFAULT_BLOCK_SIZE_BITS = (int) (Math.log(DEFAULT_BLOCK_SIZE) / Math.log(2)); @@ -149,8 +147,7 @@ public void addByte(byte val) { } public void addInt(int val) { - // puts int in little endian order - addBytes(intToByteArrayLE(val)); + addHash(Murmur3.hash64(val)); } @@ -184,6 +181,7 @@ public boolean testBytes(byte[] val, int offset, int length) { private boolean testHash(long hash64) { final int hash1 = (int) hash64; final int hash2 = (int) (hash64 >>> 32); + final long[] bits = bitSet.data; int firstHash = hash1 + hash2; // hashcode should be positive, flip all the bits if it's negative @@ -216,7 +214,7 @@ private boolean testHash(long hash64) { long expected = 0; for (int i = 0; i < DEFAULT_BLOCK_SIZE; i++) { final long mask = masks[i]; - expected |= (bitSet.data[blockBaseOffset + i] & mask) ^ mask; + expected |= (bits[blockBaseOffset + i] & mask) ^ mask; } // clear the mask for array reuse (this is to avoid masks array allocation in inner loop) @@ -235,7 +233,7 @@ public boolean testByte(byte val) { } public boolean testInt(int val) { - return testBytes(intToByteArrayLE(val)); + return testHash(Murmur3.hash64(val)); } public boolean testLong(long val) { @@ -250,26 +248,6 @@ public boolean testDouble(double val) { return testLong(Double.doubleToLongBits(val)); } - private byte[] intToByteArrayLE(int val) { - BYTE_ARRAY_4[0] = (byte) (val >> 0); - BYTE_ARRAY_4[1] = (byte) (val >> 8); - BYTE_ARRAY_4[2] = (byte) (val >> 16); - BYTE_ARRAY_4[3] = (byte) (val >> 24); - return BYTE_ARRAY_4; - } - - private byte[] longToByteArrayLE(long val) { - BYTE_ARRAY_8[0] = (byte) (val >> 0); - BYTE_ARRAY_8[1] = (byte) (val >> 8); - BYTE_ARRAY_8[2] = (byte) (val >> 16); - BYTE_ARRAY_8[3] = (byte) (val >> 24); - BYTE_ARRAY_8[4] = (byte) (val >> 32); - BYTE_ARRAY_8[5] = (byte) (val >> 40); - BYTE_ARRAY_8[6] = (byte) (val >> 48); - BYTE_ARRAY_8[7] = (byte) (val >> 56); - return BYTE_ARRAY_8; - } - public long sizeInBytes() { return getBitSize() / 8; }