diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index e8b722e..9e77d22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -179,13 +179,17 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long specific repeated lookup. */ - long key = vector[0]; JoinUtil.JoinResult joinResult; - if (useMinMax && (key < min || key > max)) { - // Out of range for whole batch. + if (!joinColVector.noNulls && joinColVector.isNull[0]) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]); + long key = vector[0]; + if (useMinMax && (key < min || key > max)) { + // Out of range for whole batch. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + joinResult = hashMultiSet.contains(key, hashMultiSetResults[0]); + } } /* @@ -235,13 +239,21 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long get key. */ - long currentKey = vector[batchIndex]; + long currentKey; + boolean isNull; + if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) { + currentKey = 0; + isNull = true; + } else { + currentKey = vector[batchIndex]; + isNull = false; + } /* * Equal key series checking. */ - if (!haveSaveKey || currentKey != saveKey) { + if (isNull || !haveSaveKey || currentKey != saveKey) { // New key. @@ -261,25 +273,30 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Single-Column Long specific save key. - */ - - saveKey = currentKey; - - /* - * Single-Column Long specific lookup key. - */ - - if (useMinMax && (currentKey < min || currentKey > max)) { - // Key out of range for whole hash table. + if (isNull) { saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; } else { - saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]); + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Single-Column Long specific save key. + */ + + saveKey = currentKey; + + /* + * Single-Column Long specific lookup key. + */ + + if (useMinMax && (currentKey < min || currentKey > max)) { + // Key out of range for whole hash table. + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + } else { + saveJoinResult = hashMultiSet.contains(currentKey, hashMultiSetResults[hashMultiSetResultCount]); + } } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index e016013..b1a315f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -196,13 +196,14 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, 0); + JoinUtil.JoinResult joinResult; if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + joinResult = hashMultiSet.contains(keyBytes, 0, keyLength, hashMultiSetResults[0]); } - byte[] keyBytes = currentKeyOutput.getData(); - int keyLength = currentKeyOutput.getLength(); - JoinUtil.JoinResult joinResult = hashMultiSet.contains(keyBytes, 0, keyLength, hashMultiSetResults[0]); /* * Common repeated join result processing. @@ -254,16 +255,13 @@ public void process(Object row, int tag) throws HiveException { // Generate binary sortable key for current row in vectorized row batch. keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, batchIndex); - if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); - } + boolean isAllNull = keyVectorSerializeWrite.getHasAnyNulls(); /* * Equal key series checking. */ - if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { + if (isAllNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { // New key. @@ -283,25 +281,30 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Multi-Key specific save key. - */ - - temp = saveKeyOutput; - saveKeyOutput = currentKeyOutput; - currentKeyOutput = temp; - - /* - * Single-Column Long specific lookup key. - */ - - byte[] keyBytes = saveKeyOutput.getData(); - int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMultiSet.contains(keyBytes, 0, keyLength, hashMultiSetResults[hashMultiSetResultCount]); + if (isAllNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Multi-Key specific save key. + */ + + temp = saveKeyOutput; + saveKeyOutput = currentKeyOutput; + currentKeyOutput = temp; + + /* + * Single-Column Long specific lookup key. + */ + + byte[] keyBytes = saveKeyOutput.getData(); + int keyLength = saveKeyOutput.getLength(); + saveJoinResult = hashMultiSet.contains(keyBytes, 0, keyLength, hashMultiSetResults[hashMultiSetResultCount]); + } /* * Common inner big-only join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index c07d353..2711b10 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -175,10 +175,15 @@ public void process(Object row, int tag) throws HiveException { * Single-Column String specific repeated lookup. */ - byte[] keyBytes = vector[0]; - int keyStart = start[0]; - int keyLength = length[0]; - JoinUtil.JoinResult joinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[0]); + JoinUtil.JoinResult joinResult; + if (!joinColVector.noNulls && joinColVector.isNull[0]) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = vector[0]; + int keyStart = start[0]; + int keyLength = length[0]; + joinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[0]); + } /* * Common repeated join result processing. @@ -228,12 +233,13 @@ public void process(Object row, int tag) throws HiveException { */ // Implicit -- use batchIndex. + boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex]; /* * Equal key series checking. */ - if (!haveSaveKey || + if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) { @@ -255,24 +261,29 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Single-Column String specific save key. - */ - - saveKeyBatchIndex = batchIndex; - - /* - * Single-Column String specific lookup key. - */ - - byte[] keyBytes = vector[batchIndex]; - int keyStart = start[batchIndex]; - int keyLength = length[batchIndex]; - saveJoinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[hashMultiSetResultCount]); + if (isNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Single-Column String specific save key. + */ + + saveKeyBatchIndex = batchIndex; + + /* + * Single-Column String specific lookup key. + */ + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + saveJoinResult = hashMultiSet.contains(keyBytes, keyStart, keyLength, hashMultiSetResults[hashMultiSetResultCount]); + } /* * Common inner big-only join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 92d7328..0197225 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -177,13 +177,17 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long specific repeated lookup. */ - long key = vector[0]; JoinUtil.JoinResult joinResult; - if (useMinMax && (key < min || key > max)) { - // Out of range for whole batch. + if (!joinColVector.noNulls && joinColVector.isNull[0]) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMap.lookup(key, hashMapResults[0]); + long key = vector[0]; + if (useMinMax && (key < min || key > max)) { + // Out of range for whole batch. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + joinResult = hashMap.lookup(key, hashMapResults[0]); + } } /* @@ -233,13 +237,21 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long get key. */ - long currentKey = vector[batchIndex]; + long currentKey; + boolean isNull; + if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) { + currentKey = 0; + isNull = true; + } else { + currentKey = vector[batchIndex]; + isNull = false; + } /* * Equal key series checking. */ - if (!haveSaveKey || currentKey != saveKey) { + if (isNull || !haveSaveKey || currentKey != saveKey) { // New key. @@ -258,25 +270,30 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Single-Column Long specific save key. - */ - - saveKey = currentKey; - - /* - * Single-Column Long specific lookup key. - */ - - if (useMinMax && (currentKey < min || currentKey > max)) { - // Key out of range for whole hash table. + if (isNull) { saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; } else { - saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Single-Column Long specific save key. + */ + + saveKey = currentKey; + + /* + * Single-Column Long specific lookup key. + */ + + if (useMinMax && (currentKey < min || currentKey > max)) { + // Key out of range for whole hash table. + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + } else { + saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + } } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index eb78174..e35ae21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -193,13 +193,14 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, 0); + JoinUtil.JoinResult joinResult; if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); } - byte[] keyBytes = currentKeyOutput.getData(); - int keyLength = currentKeyOutput.getLength(); - JoinUtil.JoinResult joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); /* * Common repeated join result processing. @@ -251,16 +252,13 @@ public void process(Object row, int tag) throws HiveException { // Generate binary sortable key for current row in vectorized row batch. keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, batchIndex); - if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); - } + boolean isAllNull = keyVectorSerializeWrite.getHasAnyNulls(); /* * Equal key series checking. */ - if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { + if (isAllNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { // New key. @@ -279,25 +277,30 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Multi-Key specific save key. - */ - - temp = saveKeyOutput; - saveKeyOutput = currentKeyOutput; - currentKeyOutput = temp; - - /* - * Multi-Key specific lookup key. - */ - - byte[] keyBytes = saveKeyOutput.getData(); - int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + if (isAllNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Multi-Key specific save key. + */ + + temp = saveKeyOutput; + saveKeyOutput = currentKeyOutput; + currentKeyOutput = temp; + + /* + * Multi-Key specific lookup key. + */ + + byte[] keyBytes = saveKeyOutput.getData(); + int keyLength = saveKeyOutput.getLength(); + saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + } /* * Common inner join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 4b508d4..b2711c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -169,11 +169,15 @@ public void process(Object row, int tag) throws HiveException { /* * Single-Column String specific repeated lookup. */ - - byte[] keyBytes = vector[0]; - int keyStart = start[0]; - int keyLength = length[0]; - JoinUtil.JoinResult joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + JoinUtil.JoinResult joinResult; + if (!joinColVector.noNulls && joinColVector.isNull[0]) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = vector[0]; + int keyStart = start[0]; + int keyLength = length[0]; + joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + } /* * Common repeated join result processing. @@ -223,12 +227,13 @@ public void process(Object row, int tag) throws HiveException { */ // Implicit -- use batchIndex. + boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex]; /* * Equal key series checking. */ - if (!haveSaveKey || + if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) { @@ -249,24 +254,29 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Single-Column String specific save key. - */ - - saveKeyBatchIndex = batchIndex; - - /* - * Single-Column String specific lookup key. - */ - - byte[] keyBytes = vector[batchIndex]; - int keyStart = start[batchIndex]; - int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + if (isNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Single-Column String specific save key. + */ + + saveKeyBatchIndex = batchIndex; + + /* + * Single-Column String specific lookup key. + */ + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + } /* * Common inner join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 762b6fa..e6722cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -179,13 +179,17 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long specific repeated lookup. */ - long key = vector[0]; JoinUtil.JoinResult joinResult; - if (useMinMax && (key < min || key > max)) { - // Out of range for whole batch. + if (!joinColVector.noNulls && joinColVector.isNull[0]) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashSet.contains(key, hashSetResults[0]); + long key = vector[0]; + if (useMinMax && (key < min || key > max)) { + // Out of range for whole batch. + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + joinResult = hashSet.contains(key, hashSetResults[0]); + } } /* @@ -234,13 +238,21 @@ public void process(Object row, int tag) throws HiveException { * Single-Column Long get key. */ - long currentKey = vector[batchIndex]; + long currentKey; + boolean isNull; + if (!joinColVector.noNulls && joinColVector.isNull[batchIndex]) { + currentKey = 0; + isNull = true; + } else { + currentKey = vector[batchIndex]; + isNull = false; + } /* * Equal key series checking. */ - if (!haveSaveKey || currentKey != saveKey) { + if (isNull || !haveSaveKey || currentKey != saveKey) { // New key. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index a7a51f7..e144c12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -195,14 +195,15 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, 0); + JoinUtil.JoinResult joinResult; if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + // LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength)); + joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]); } - byte[] keyBytes = currentKeyOutput.getData(); - int keyLength = currentKeyOutput.getLength(); - // LOG.debug(CLASS_NAME + " processOp all " + displayBytes(keyBytes, 0, keyLength)); - JoinUtil.JoinResult joinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[0]); /* * Common repeated join result processing. @@ -253,10 +254,7 @@ public void process(Object row, int tag) throws HiveException { // Generate binary sortable key for current row in vectorized row batch. keyVectorSerializeWrite.setOutput(currentKeyOutput); keyVectorSerializeWrite.serializeWrite(batch, batchIndex); - if (keyVectorSerializeWrite.getHasAnyNulls()) { - // Not expecting NULLs in MapJoin -- they should have been filtered out. - throw new HiveException("Null key not expected in MapJoin"); - } + boolean isAllNull = keyVectorSerializeWrite.getHasAnyNulls(); // LOG.debug(CLASS_NAME + " currentKey " + // VectorizedBatchUtil.displayBytes(currentKeyOutput.getData(), 0, currentKeyOutput.getLength())); @@ -265,7 +263,7 @@ public void process(Object row, int tag) throws HiveException { * Equal key series checking. */ - if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { + if (isAllNull || !haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) { // New key. @@ -284,25 +282,30 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Multi-Key specific save key and lookup. - */ - - temp = saveKeyOutput; - saveKeyOutput = currentKeyOutput; - currentKeyOutput = temp; - - /* - * Multi-key specific lookup key. - */ - - byte[] keyBytes = saveKeyOutput.getData(); - int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]); + if (isAllNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Multi-Key specific save key and lookup. + */ + + temp = saveKeyOutput; + saveKeyOutput = currentKeyOutput; + currentKeyOutput = temp; + + /* + * Multi-key specific lookup key. + */ + + byte[] keyBytes = saveKeyOutput.getData(); + int keyLength = saveKeyOutput.getLength(); + saveJoinResult = hashSet.contains(keyBytes, 0, keyLength, hashSetResults[hashSetResultCount]); + } /* * Common left-semi join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index eaa3af4..a8d3459 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -172,10 +172,15 @@ public void process(Object row, int tag) throws HiveException { * Single-Column String specific repeated lookup. */ - byte[] keyBytes = vector[0]; - int keyStart = start[0]; - int keyLength = length[0]; - JoinUtil.JoinResult joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]); + JoinUtil.JoinResult joinResult; + if (!joinColVector.noNulls && joinColVector.isNull[0]) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + byte[] keyBytes = vector[0]; + int keyStart = start[0]; + int keyLength = length[0]; + joinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[0]); + } /* * Common repeated join result processing. @@ -224,12 +229,13 @@ public void process(Object row, int tag) throws HiveException { */ // Implicit -- use batchIndex. + boolean isNull = !joinColVector.noNulls && joinColVector.isNull[batchIndex]; /* * Equal key series checking. */ - if (!haveSaveKey || + if (isNull || !haveSaveKey || StringExpr.equal(vector[saveKeyBatchIndex], start[saveKeyBatchIndex], length[saveKeyBatchIndex], vector[batchIndex], start[batchIndex], length[batchIndex]) == false) { @@ -250,24 +256,29 @@ public void process(Object row, int tag) throws HiveException { } } - // Regardless of our matching result, we keep that information to make multiple use - // of it for a possible series of equal keys. - haveSaveKey = true; - - /* - * Single-Column String specific save key and lookup. - */ - - saveKeyBatchIndex = batchIndex; - - /* - * Single-Column String specific lookup key. - */ - - byte[] keyBytes = vector[batchIndex]; - int keyStart = start[batchIndex]; - int keyLength = length[batchIndex]; - saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]); + if (isNull) { + saveJoinResult = JoinUtil.JoinResult.NOMATCH; + haveSaveKey = false; + } else { + // Regardless of our matching result, we keep that information to make multiple use + // of it for a possible series of equal keys. + haveSaveKey = true; + + /* + * Single-Column String specific save key and lookup. + */ + + saveKeyBatchIndex = batchIndex; + + /* + * Single-Column String specific lookup key. + */ + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + saveJoinResult = hashSet.contains(keyBytes, keyStart, keyLength, hashSetResults[hashSetResultCount]); + } /* * Common left-semi join result processing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index dfc9bf1..f37f056 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -81,12 +81,7 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); if (keyBinarySortableDeserializeRead.readCheckNull()) { - if (isOuterJoin) { - return; - } else { - // For inner join, we expect all NULL values to have been filtered out before now. - throw new HiveException("Unexpected NULL in map join small table"); - } + return; } long key = VectorMapJoinFastLongHashUtil.deserializeLongKey( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 5c7792f..77c8709 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -45,12 +45,7 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); if (keyBinarySortableDeserializeRead.readCheckNull()) { - if (isOuterJoin) { - return; - } else { - // For inner join, we expect all NULL values to have been filtered out before now. - throw new HiveException("Unexpected NULL in map join small table"); - } + return; } keyBinarySortableDeserializeRead.readString(readStringResults);