diff --git common/src/java/org/apache/hive/common/util/HashCodeUtil.java common/src/java/org/apache/hive/common/util/HashCodeUtil.java index 700b2e14f0..327e839391 100644 --- common/src/java/org/apache/hive/common/util/HashCodeUtil.java +++ common/src/java/org/apache/hive/common/util/HashCodeUtil.java @@ -23,6 +23,16 @@ */ public class HashCodeUtil { + // Constants for 32 bit variant + private static final int C1_32 = 0xcc9e2d51; + private static final int C2_32 = 0x1b873593; + private static final int R1_32 = 15; + private static final int R2_32 = 13; + private static final int M_32 = 5; + private static final int N_32 = 0xe6546b64; + + private static final int DEFAULT_SEED = 104729; + public static int calculateIntHashCode(int key) { key = ~key + (key << 15); // key = (key << 15) - key - 1; key = key ^ (key >>> 12); @@ -33,15 +43,12 @@ public static int calculateIntHashCode(int key) { return key; } + public static int calculateTwoLongHashCode(long l0, long l1) { + return murmur32(l0, l1, DEFAULT_SEED); + } + public static int calculateLongHashCode(long key) { - // Mixing down into the lower bits - this produces a worse hashcode in purely - // numeric terms, but leaving entropy in the higher bits is not useful for a - // 2^n bucketing scheme. See JSR166 ConcurrentHashMap r1.89 (released under Public Domain) - // Note: ConcurrentHashMap has since reverted this to retain entropy bits higher - // up, to support the 2-level hashing for segment which operates at a higher bitmask - key ^= (key >>> 7) ^ (key >>> 4); - key ^= (key >>> 20) ^ (key >>> 12); - return (int) key; + return murmur32(key, DEFAULT_SEED); } public static void calculateLongArrayHashCodes(long[] longs, int[] hashCodes, final int count) { @@ -115,4 +122,53 @@ public static int murmurHash(byte[] data, int offset, int length) { return h; } + + private static int murmur32Body(int k, int hash) { + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + } + + private static int murmur32Finalize(int hash) { + hash ^= 16; + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + return hash; + } + + /** + * Murmur3 32-bit variant. + * @see Murmur3#hash32(byte[], int, int, int) + */ + private static int murmur32(long l0, long l1, int seed) { + int hash = seed; + final long r0 = Long.reverseBytes(l0); + final long r1 = Long.reverseBytes(l1); + + hash = murmur32Body((int) r0, hash); + hash = murmur32Body((int) (r0 >>> 32), hash); + hash = murmur32Body((int) (r1), hash); + hash = murmur32Body((int) (r1 >>> 32), hash); + + return murmur32Finalize(hash); + } + + /** + * Murmur3 32-bit variant. + * @see Murmur3#hash32(byte[], int, int, int) + */ + private static int murmur32(long l0, int seed) { + int hash = seed; + final long r0 = Long.reverseBytes(l0); + + hash = murmur32Body((int) r0, hash); + hash = murmur32Body((int) (r0 >>> 32), hash); + + return murmur32Finalize(hash); + } } \ No newline at end of file diff --git common/src/test/org/apache/hive/common/util/TestHashCodeUtil.java common/src/test/org/apache/hive/common/util/TestHashCodeUtil.java new file mode 100644 index 0000000000..43e48899d4 --- /dev/null +++ common/src/test/org/apache/hive/common/util/TestHashCodeUtil.java @@ -0,0 +1,37 @@ +package org.apache.hive.common.util; + +import org.junit.Test; + +import java.nio.ByteBuffer; +import java.util.Random; + +import static org.junit.Assert.*; + +public class TestHashCodeUtil { + @Test + public void testOrdered() { + ByteBuffer buffer = ByteBuffer.allocate(16); + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < 1000; j++) { + buffer.putLong(0, i); + buffer.putLong(8, j); + assertEquals(Murmur3.hash32(buffer.array()), HashCodeUtil.calculateTwoLongHashCode(i, j)); + } + } + } + + @Test + public void testRandom() { + ByteBuffer buffer = ByteBuffer.allocate(16); + Random random = new Random(); + for (int i = 0; i < 1000; i++) { + for (int j = 0; j < 1000; j++) { + long x = random.nextLong(); + long y = random.nextLong(); + buffer.putLong(0, x); + buffer.putLong(8, y); + assertEquals(Murmur3.hash32(buffer.array()), HashCodeUtil.calculateTwoLongHashCode(x, y)); + } + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java index 165272887e..c40c35da2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java @@ -44,9 +44,7 @@ public void setHashKey() { HashCodeUtil.calculateLongHashCode(longValue0) ^ null1Hashcode)); } else { - hashcode = - HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^ - HashCodeUtil.calculateLongHashCode(longValue1); + hashcode = HashCodeUtil.calculateTwoLongHashCode(longValue0, longValue1); } }