diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 228fa72..8ed3fdb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -70,7 +70,6 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre } } - @Override public JoinUtil.JoinResult contains(long key, VectorMapJoinHashMultiSetResult hashMultiSetResult) { @@ -78,15 +77,21 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre (VectorMapJoinFastHashMultiSet.HashMultiSetResult) hashMultiSetResult; optimizedHashMultiSetResult.forget(); + final JoinUtil.JoinResult joinResult; - long hashCode = HashCodeUtil.calculateLongHashCode(key); - long count = findReadSlot(key, hashCode); - JoinUtil.JoinResult joinResult; - if (count == -1) { - joinResult = JoinUtil.JoinResult.NOMATCH; - } else { - optimizedHashMultiSetResult.set(count); + if (isWithinSimpleRange(key)) { joinResult = JoinUtil.JoinResult.MATCH; + optimizedHashMultiSetResult.set(1); + } else { + long hashCode = HashCodeUtil.calculateLongHashCode(key); + long count = findReadSlot(key, hashCode); + + if (count == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + optimizedHashMultiSetResult.set(count); + joinResult = JoinUtil.JoinResult.MATCH; + } } optimizedHashMultiSetResult.setJoinResult(joinResult); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..d90694a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -57,6 +57,12 @@ public void testPutRow(long currentKey) throws HiveException, IOException { } @Override + protected boolean hasRepeatedKeys() { + // the repetition is ignored in the HashSet + return false; + } + + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { int pairIndex = 2 * slot; @@ -75,13 +81,22 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { optimizedHashSetResult.forget(); - long hashCode = HashCodeUtil.calculateLongHashCode(key); - long existance = findReadSlot(key, hashCode); - JoinUtil.JoinResult joinResult; - if (existance == -1) { - joinResult = JoinUtil.JoinResult.NOMATCH; - } else { + final JoinUtil.JoinResult joinResult; + + if (isWithinSimpleRange(key)) { + // the reason this is not happening within the operators is + // because of optimizedHashSetResult type safety checks for + // optimized/fast hash results joinResult = JoinUtil.JoinResult.MATCH; + } else { + long hashCode = HashCodeUtil.calculateLongHashCode(key); + long existance = findReadSlot(key, hashCode); + + if (existance == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + joinResult = JoinUtil.JoinResult.MATCH; + } } optimizedHashSetResult.setJoinResult(joinResult); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..be4d9e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -56,6 +56,7 @@ private final boolean useMinMax; private long min; private long max; + private boolean repeatedKeys = false; @Override public boolean useMinMax() { @@ -72,6 +73,28 @@ public long max() { return max; } + protected boolean hasRepeatedKeys() { + return repeatedKeys; + } + + public final boolean isSimpleRange() { + if (!useMinMax) { + return false; + } + if (hasRepeatedKeys()) { + return false; + } + // (10 - 1) + 1 = 10 keys + if ((max - min) + 1 == keysAssigned) { + return true; + } + return false; + } + + public final boolean isWithinSimpleRange(long key) { + return isSimpleRange() && key >= min && key <= max; + } + @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); @@ -150,6 +173,8 @@ public void add(long key, BytesWritable currentValue) { max = key; } } + } else { + repeatedKeys = true; } } @@ -282,6 +307,7 @@ public VectorMapJoinFastLongHashTable( useMinMax = minMaxEnabled; min = Long.MAX_VALUE; max = Long.MIN_VALUE; + repeatedKeys = false; } @Override