diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index a9885d8..9bfcf27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -57,6 +57,7 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.JobContext; import org.apache.hadoop.util.hash.MurmurHash; /** @@ -83,6 +84,7 @@ private boolean skipTag = false; private transient InspectableObject tempInspectableObject = new InspectableObject(); private transient int[] valueIndex; // index for value(+ from keys, - from values) + private transient short hashSeed = 31; protected transient OutputCollector out; /** @@ -243,6 +245,10 @@ protected void initializeOp(Configuration hconf) throws HiveException { useUniformHash = conf.getReducerTraits().contains(UNIFORM); + if (hconf.getInt(JobContext.NUM_REDUCES, -1) == 31) { + hashSeed = 131; + } + firstRow = true; } catch (Exception e) { String msg = "Error initializing ReduceSinkOperator: " + e.getMessage(); @@ -490,7 +496,7 @@ private int computeHashCode(Object row, int buckNum) throws HiveException { for(int i = 0; i < partitionEval.length; i++) { bucketFieldValues[i] = partitionEval[i].evaluate(row); } - keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors); + keyHashCode = ObjectInspectorUtils.getBucketHashCode(bucketFieldValues, partitionObjectInspectors, hashSeed); } int hashCode = buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum; if (isLogTraceEnabled) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 1ac72c6..765dce2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -626,10 +626,22 @@ public static int getBucketNumber(int hashCode, int numberOfBuckets) { * @return */ public static int getBucketHashCode(Object[] bucketFields, ObjectInspector[] bucketFieldInspectors) { + return getBucketHashCode(bucketFields, bucketFieldInspectors, 31); + } + + /** + * + * Computes the hash code for the given bucketed fields with specified seed + * @param bucketFields + * @param bucketFieldInspectors + * @param hashSeed + * @return + */ + public static int getBucketHashCode(Object[] bucketFields, ObjectInspector[] bucketFieldInspectors, int hashSeed) { int hashCode = 0; for (int i = 0; i < bucketFields.length; i++) { int fieldHash = ObjectInspectorUtils.hashCode(bucketFields[i], bucketFieldInspectors[i]); - hashCode = 31 * hashCode + fieldHash; + hashCode = hashSeed * hashCode + fieldHash; } return hashCode; }