diff --git common/src/java/org/apache/hive/common/util/HashCodeUtil.java common/src/java/org/apache/hive/common/util/HashCodeUtil.java index 1330cbe..d763b13 100644 --- common/src/java/org/apache/hive/common/util/HashCodeUtil.java +++ common/src/java/org/apache/hive/common/util/HashCodeUtil.java @@ -47,7 +47,9 @@ public static void calculateLongArrayHashCodes(long[] longs, int[] hashCodes, fi } } + @Deprecated public static int calculateBytesHashCode(byte[] keyBytes, int keyStart, int keyLength) { + // Don't use this for ReduceSinkOperators return murmurHash(keyBytes, keyStart, keyLength); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keyseries/VectorKeySeriesSerializedImpl.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keyseries/VectorKeySeriesSerializedImpl.java index 77c9ecc..f81c636 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keyseries/VectorKeySeriesSerializedImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/keyseries/VectorKeySeriesSerializedImpl.java @@ -93,9 +93,8 @@ public int getSerializedLength() { * Batch compute the hash codes for all the serialized keys. * * NOTE: MAJOR MAJOR ASSUMPTION: - * We assume that HashCodeUtil.murmurHash produces the same result - * as MurmurHash.hash with seed = 0 (the method used by ReduceSinkOperator for - * UNIFORM distribution). + * We use Murmur3.hash32(seed=0) across the board for the ReduceSink UNIFORM distribution. + * Previous use of HashCodeUtil is deprecated. */ protected void computeSerializedHashCodes() { int offset = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java index 324aa97..728c4bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/reducesink/VectorReduceSinkUniformHashOperator.java @@ -28,7 +28,9 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.util.StringUtils; import org.apache.hive.common.util.HashCodeUtil; +import org.apache.hive.common.util.Murmur3; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -86,7 +88,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { int nullBytesLength = nullKeyOutput.getLength(); nullBytes = new byte[nullBytesLength]; System.arraycopy(nullKeyOutput.getData(), 0, nullBytes, 0, nullBytesLength); - nullKeyHashCode = HashCodeUtil.calculateBytesHashCode(nullBytes, 0, nullBytesLength); + nullKeyHashCode = Murmur3.hash32(nullBytes, 0, nullBytesLength, 0); } catch (Exception e) { throw new HiveException(e); }