diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index add8b9c997..d314b6adc0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -154,7 +154,7 @@ public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker mat public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5ec90b40b9..c384c09193 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -51,7 +51,7 @@ public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 7c73aa6f9c..19b57911a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -46,7 +46,7 @@ public VectorMapJoinHashSetResult createHashSetResult() { public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 3d45a54728..79b39b4de5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -62,8 +62,8 @@ protected void expandAndRehash() { if (logicalHashBucketCount > HIGHEST_INT_POWER_OF_2) { throwExpandError(HIGHEST_INT_POWER_OF_2, "Bytes"); } - int newLogicalHashBucketCount = logicalHashBucketCount * 2; - int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; + final int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2); + final int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; int newMetricPutConflict = 0; int newLargestNumberOfSteps = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index 2d05eab6bd..248d1255b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -29,6 +29,9 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); + // when rehashing, jump directly to 4k items + public static final int FIRST_SIZE_UP = 4096; + protected final boolean isFullOuter; protected int logicalHashBucketCount; @@ -98,6 +101,12 @@ public int size() { return keysAssigned; } + protected final boolean checkResize() { + // resize small hashtables up to a higher width (4096 items), but when there are collisions + return (resizeThreshold <= keysAssigned) + || (logicalHashBucketCount <= FIRST_SIZE_UP && largestNumberOfSteps > 1); + } + @Override public long getEstimatedMemorySize() { int size = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index 03ef249241..a35401d9b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -96,7 +96,7 @@ public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) public void add(long key, BytesWritable currentValue) { - if (resizeThreshold <= keysAssigned) { + if (checkResize()) { expandAndRehash(); } @@ -157,7 +157,7 @@ private void expandAndRehash() { if (logicalHashBucketCount > ONE_QUARTER_LIMIT) { throwExpandError(ONE_QUARTER_LIMIT, "Long"); } - int newLogicalHashBucketCount = logicalHashBucketCount * 2; + int newLogicalHashBucketCount = Math.max(FIRST_SIZE_UP, logicalHashBucketCount * 2); int newLogicalHashBucketMask = newLogicalHashBucketCount - 1; int newMetricPutConflict = 0; int newLargestNumberOfSteps = 0; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java index 9bf8bbc734..0308daf400 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java @@ -192,7 +192,7 @@ public void testExpand() throws Exception { VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { byte[] key; while (true) { key = new byte[random.nextInt(MAX_KEY_LENGTH)]; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java index c6a8f7ac9f..507544eaaf 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java @@ -172,7 +172,7 @@ public void testExpand() throws Exception { VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { byte[] key; while (true) { key = new byte[random.nextInt(MAX_KEY_LENGTH)]; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index 6fccde9600..1cd6d4df2a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -173,7 +173,7 @@ public void testExpand() throws Exception { VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { byte[] key; while (true) { key = new byte[random.nextInt(MAX_KEY_LENGTH)]; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java index a21bdcf34c..f72704c8d0 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java @@ -188,7 +188,7 @@ public void testExpand() throws Exception { VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { long key; while (true) { key = random.nextLong(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java index 5847787b37..d40351a215 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -104,7 +104,7 @@ public void testExpand() throws Exception { VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { long key; while (true) { key = random.nextLong(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java index 39f66329a9..d873ca66b5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java @@ -173,7 +173,7 @@ public void testExpand() throws Exception { VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { long key; while (true) { key = random.nextLong(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java index bbb5da0d59..97d437506f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -171,7 +171,7 @@ public void testExpand() throws Exception { VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); - for (int i = 0; i < 18; ++i) { + for (int i = 0; i < 6; ++i) { long key; while (true) { key = random.nextLong();