diff --git common/src/java/org/apache/hive/common/util/HashCodeUtil.java common/src/java/org/apache/hive/common/util/HashCodeUtil.java index 700b2e14f0..c636ca59de 100644 --- common/src/java/org/apache/hive/common/util/HashCodeUtil.java +++ common/src/java/org/apache/hive/common/util/HashCodeUtil.java @@ -23,6 +23,16 @@ */ public class HashCodeUtil { + // Constants for 32 bit variant + private static final int C1_32 = 0xcc9e2d51; + private static final int C2_32 = 0x1b873593; + private static final int R1_32 = 15; + private static final int R2_32 = 13; + private static final int M_32 = 5; + private static final int N_32 = 0xe6546b64; + + private static final int DEFAULT_SEED = 104729; + public static int calculateIntHashCode(int key) { key = ~key + (key << 15); // key = (key << 15) - key - 1; key = key ^ (key >>> 12); @@ -32,16 +42,12 @@ public static int calculateIntHashCode(int key) { key = key ^ (key >>> 16); return key; } + public static int calculateTwoLongHashCode(long l0, long l1) { + return hash32(l0, l1, DEFAULT_SEED); + } public static int calculateLongHashCode(long key) { - // Mixing down into the lower bits - this produces a worse hashcode in purely - // numeric terms, but leaving entropy in the higher bits is not useful for a - // 2^n bucketing scheme. See JSR166 ConcurrentHashMap r1.89 (released under Public Domain) - // Note: ConcurrentHashMap has since reverted this to retain entropy bits higher - // up, to support the 2-level hashing for segment which operates at a higher bitmask - key ^= (key >>> 7) ^ (key >>> 4); - key ^= (key >>> 20) ^ (key >>> 12); - return (int) key; + return hash32(key, DEFAULT_SEED); } public static void calculateLongArrayHashCodes(long[] longs, int[] hashCodes, final int count) { @@ -115,4 +121,138 @@ public static int murmurHash(byte[] data, int offset, int length) { return h; } + + /** + * Murmur3 32-bit variant. + * @see Murmur3#hash32(byte[], int, int, int) + */ + private static int hash32(long l0, long l1, int seed) { + int hash = seed; + + final byte b0 = (byte) (l0 >> 56); + final byte b1 = (byte) (l0 >> 48); + final byte b2 = (byte) (l0 >> 40); + final byte b3 = (byte) (l0 >> 32); + final byte b4 = (byte) (l0 >> 24); + final byte b5 = (byte) (l0 >> 16); + final byte b6 = (byte) (l0 >> 8); + final byte b7 = (byte) (l0 >> 0); + final byte b8 = (byte) (l1 >> 56); + final byte b9 = (byte) (l1 >> 48); + final byte b10 = (byte) (l1 >> 40); + final byte b11 = (byte) (l1 >> 32); + final byte b12 = (byte) (l1 >> 24); + final byte b13 = (byte) (l1 >> 16); + final byte b14 = (byte) (l1 >> 8); + final byte b15 = (byte) (l1 >> 0); + + // body + int k; + + // first 8 bytes + k = (b0 & 0xff) + | ((b1 & 0xff) << 8) + | ((b2 & 0xff) << 16) + | ((b3 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // second 8 bytes + k = (b4 & 0xff) + | ((b5 & 0xff) << 8) + | ((b6 & 0xff) << 16) + | ((b7 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // third 8 bytes + k = (b8 & 0xff) + | ((b9 & 0xff) << 8) + | ((b10 & 0xff) << 16) + | ((b11 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // last 8 bytes + k = (b12 & 0xff) + | ((b13 & 0xff) << 8) + | ((b14 & 0xff) << 16) + | ((b15 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // finalization + hash ^= 16; + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + + return hash; + } + + /** + * Murmur3 32-bit variant. + * @see Murmur3#hash32(byte[], int, int, int) + */ + private static int hash32(long l0, int seed) { + int hash = seed; + + final byte b0 = (byte) (l0 >> 56); + final byte b1 = (byte) (l0 >> 48); + final byte b2 = (byte) (l0 >> 40); + final byte b3 = (byte) (l0 >> 32); + final byte b4 = (byte) (l0 >> 24); + final byte b5 = (byte) (l0 >> 16); + final byte b6 = (byte) (l0 >> 8); + final byte b7 = (byte) (l0 >> 0); + + // body + int k; + + // first 8 bytes + k = (b0 & 0xff) + | ((b1 & 0xff) << 8) + | ((b2 & 0xff) << 16) + | ((b3 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // last 8 bytes + k = (b4 & 0xff) + | ((b5 & 0xff) << 8) + | ((b6 & 0xff) << 16) + | ((b7 & 0xff) << 24); + k *= C1_32; + k = Integer.rotateLeft(k, R1_32); + k *= C2_32; + hash ^= k; + hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; + + // finalization + hash ^= 16; + hash ^= (hash >>> 16); + hash *= 0x85ebca6b; + hash ^= (hash >>> 13); + hash *= 0xc2b2ae35; + hash ^= (hash >>> 16); + + return hash; + } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java index 165272887e..c40c35da2e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java @@ -44,9 +44,7 @@ public void setHashKey() { HashCodeUtil.calculateLongHashCode(longValue0) ^ null1Hashcode)); } else { - hashcode = - HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^ - HashCodeUtil.calculateLongHashCode(longValue1); + hashcode = HashCodeUtil.calculateTwoLongHashCode(longValue0, longValue1); } }