diff --git common/src/java/org/apache/hive/common/util/HashCodeUtil.java common/src/java/org/apache/hive/common/util/HashCodeUtil.java index 700b2e14f0..1330cbeb6f 100644 --- common/src/java/org/apache/hive/common/util/HashCodeUtil.java +++ common/src/java/org/apache/hive/common/util/HashCodeUtil.java @@ -33,15 +33,12 @@ public static int calculateIntHashCode(int key) { return key; } + public static int calculateTwoLongHashCode(long l0, long l1) { + return Murmur3.hash32(l0, l1); + } + public static int calculateLongHashCode(long key) { - // Mixing down into the lower bits - this produces a worse hashcode in purely - // numeric terms, but leaving entropy in the higher bits is not useful for a - // 2^n bucketing scheme. See JSR166 ConcurrentHashMap r1.89 (released under Public Domain) - // Note: ConcurrentHashMap has since reverted this to retain entropy bits higher - // up, to support the 2-level hashing for segment which operates at a higher bitmask - key ^= (key >>> 7) ^ (key >>> 4); - key ^= (key >>> 20) ^ (key >>> 12); - return (int) key; + return Murmur3.hash32(key); } public static void calculateLongArrayHashCodes(long[] longs, int[] hashCodes, final int count) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java index 165272887e..c40c35da2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java @@ -44,9 +44,7 @@ public void setHashKey() { HashCodeUtil.calculateLongHashCode(longValue0) ^ null1Hashcode)); } else { - hashcode = - HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^ - HashCodeUtil.calculateLongHashCode(longValue1); + hashcode = HashCodeUtil.calculateTwoLongHashCode(longValue0, longValue1); } } diff --git storage-api/src/java/org/apache/hive/common/util/Murmur3.java storage-api/src/java/org/apache/hive/common/util/Murmur3.java index 8aae28b9c4..85db95c08d 100644 --- storage-api/src/java/org/apache/hive/common/util/Murmur3.java +++ storage-api/src/java/org/apache/hive/common/util/Murmur3.java @@ -54,6 +54,43 @@ public static final int DEFAULT_SEED = 104729; + public static int hash32(long l0, long l1) { + return hash32(l0, l1, DEFAULT_SEED); + } + + public static int hash32(long l0) { + return hash32(l0, DEFAULT_SEED); + } + + /** + * Murmur3 32-bit variant. + */ + public static int hash32(long l0, int seed) { + int hash = seed; + final long r0 = Long.reverseBytes(l0); + + hash = mix32((int) r0, hash); + hash = mix32((int) (r0 >>> 32), hash); + + return fmix32(Long.BYTES, hash); + } + + /** + * Murmur3 32-bit variant. + */ + public static int hash32(long l0, long l1, int seed) { + int hash = seed; + final long r0 = Long.reverseBytes(l0); + final long r1 = Long.reverseBytes(l1); + + hash = mix32((int) r0, hash); + hash = mix32((int) (r0 >>> 32), hash); + hash = mix32((int) (r1), hash); + hash = mix32((int) (r1 >>> 32), hash); + + return fmix32(Long.BYTES * 2, hash); + } + /** * Murmur3 32-bit variant. * @@ -108,12 +145,7 @@ public static int hash32(byte[] data, int offset, int length, int seed) { | ((data[offset + i_4 + 2] & 0xff) << 16) | ((data[offset + i_4 + 3] & 0xff) << 24); - // mix functions - k *= C1_32; - k = Integer.rotateLeft(k, R1_32); - k *= C2_32; - hash ^= k; - hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + hash = mix32(k, hash); } // tail @@ -134,7 +166,18 @@ public static int hash32(byte[] data, int offset, int length, int seed) { hash ^= k1; } - // finalization + return fmix32(length, hash); + } + + private static int mix32(int k, int hash) { + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + } + + private static int fmix32(int length, int hash) { hash ^= length; hash ^= (hash >>> 16); hash *= 0x85ebca6b; diff --git storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java index 16955c11a3..7320d6d56a 100644 --- storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java +++ storage-api/src/test/org/apache/hive/common/util/TestMurmur3.java @@ -270,4 +270,51 @@ public void testIncremental() { assertEquals("Block size " + blockSize, expected, diff.end()); } } + + @Test + public void testTwoLongOrdered() { + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES * 2); + for (long i = 0; i < 1000; i++) { + for (long j = 0; j < 1000; j++) { + buffer.putLong(0, i); + buffer.putLong(Long.BYTES, j); + assertEquals(Murmur3.hash32(buffer.array()), Murmur3.hash32(i, j)); + } + } + } + + @Test + public void testTwoLongRandom() { + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES * 2); + Random random = new Random(); + for (long i = 0; i < 1000; i++) { + for (long j = 0; j < 1000; j++) { + long x = random.nextLong(); + long y = random.nextLong(); + buffer.putLong(0, x); + buffer.putLong(Long.BYTES, y); + assertEquals(Murmur3.hash32(buffer.array()), Murmur3.hash32(x, y)); + } + } + } + + @Test + public void testSingleLongOrdered() { + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); + for (long i = 0; i < 1000; i++) { + buffer.putLong(0, i); + assertEquals(Murmur3.hash32(buffer.array()), Murmur3.hash32(i)); + } + } + + @Test + public void testSingleLongRandom() { + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); + Random random = new Random(); + for (long i = 0; i < 1000; i++) { + long x = random.nextLong(); + buffer.putLong(0, x); + assertEquals(Murmur3.hash32(buffer.array()), Murmur3.hash32(x)); + } + } }