From aef4b3ab52d87a68a6a60995620cd6452d16332a Mon Sep 17 00:00:00 2001 From: Gopal V Date: Thu, 29 Nov 2018 00:05:39 -0800 Subject: [PATCH] HIVE-20983: Vectorization: Scale up small hashtables, when collisions are detected (Gopal V) s/min/max/ --- .../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java | 2 +- .../vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java | 2 +- .../exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java | 2 +- .../vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java | 4 ++-- .../ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java | 9 +++++++++ .../exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java | 4 ++-- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index add8b9c..d314b6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -154,7 +154,7 @@ public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker mat public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5ec90b4..c384c09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -51,7 +51,7 @@ public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 7c73aa6..19b5791 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -46,7 +46,7 @@ public VectorMapJoinHashSetResult createHashSetResult() { public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 3d45a54..79b39b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -62,8 +62,8 @@ protected void expandAndRehash() { if (logicalHashBucketCount > HIGHEST_INT_POWER_OF_2) { throwExpandError(HIGHEST_INT_POWER_OF_2, "Bytes"); } - int newLogicalHashBucketCount = logicalHashBucketCount * 2; - int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; + final int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2); + final int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; int newMetricPutConflict = 0; int newLargestNumberOfSteps = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index 2d05eab..248d125 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -29,6 +29,9 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); + // when rehashing, jump directly to 4k items + public static final int FIRST_SIZE_UP = 4096; + protected final boolean isFullOuter; protected int logicalHashBucketCount; @@ -98,6 +101,12 @@ public int size() { return keysAssigned; } + protected final boolean checkResize() { + // resize small hashtables up to a higher width (4096 items), but when there are collisions + return (resizeThreshold <= keysAssigned) + || (logicalHashBucketCount <= FIRST_SIZE_UP && largestNumberOfSteps > 1); + } + @Override public long getEstimatedMemorySize() { int size = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index 03ef249..a35401d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -96,7 +96,7 @@ public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) public void add(long key, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } @@ -157,7 +157,7 @@ private void expandAndRehash() { if (logicalHashBucketCount > ONE_QUARTER_LIMIT) { throwExpandError(ONE_QUARTER_LIMIT, "Long"); } - int newLogicalHashBucketCount = logicalHashBucketCount * 2; + int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2); int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; int newMetricPutConflict = 0; int newLargestNumberOfSteps = 0; -- 2.4.0