diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index f272b6d..9a35fdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -132,6 +132,10 @@ // to output batch scratch columns for the small table portion. protected VectorColumnSourceMapping smallTableMapping; + // These are the output columns for the small table and the outer small table keys. + protected int[] smallTableOutputVectorColumns; + protected int[] bigTableOuterKeyOutputVectorColumns; + // These are the columns in the big and small table that are ByteColumnVector columns. // We create data buffers for these columns so we can copy strings into those columns by value. protected int[] bigTableByteColumnVectorColumns; @@ -415,6 +419,9 @@ protected void determineCommonInfo(boolean isOuter) { smallTableMapping.finalize(); + bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); + smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); + // Which big table and small table columns are ByteColumnVector and need have their data buffer // to be manually reset for some join result processing? diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 743a975..ac4fa65 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -329,7 +329,7 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, } if (bigTableVectorCopyOuterKeys != null) { bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { + for (int column : bigTableOuterKeyOutputVectorColumns) { overflowBatch.cols[column].isRepeating = true; } } @@ -345,7 +345,7 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, } if (bigTableVectorCopyOuterKeys != null) { - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { + for (int column : bigTableOuterKeyOutputVectorColumns) { ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index c998252..8f85f34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -124,6 +125,14 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + // For inner joins, we may apply the filter(s) now. for(VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index e426476..88a4e34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -128,6 +129,14 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + // For inner joins, we may apply the filter(s) now. for(VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index 3bc225a..8ba044b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -115,6 +116,14 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + // For inner joins, we may apply the filter(s) now. for(VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 3309921..51a8282 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -413,7 +413,7 @@ protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] // Mark any scratch small table scratch columns that would normally receive a copy of the // key as null and repeating. - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { + for (int column : bigTableOuterKeyOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.isRepeating = true; colVector.noNulls = false; @@ -421,7 +421,7 @@ protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] } // Small table values are set to null and repeating. - for (int column : smallTableMapping.getOutputColumns()) { + for (int column : smallTableOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.isRepeating = true; colVector.noNulls = false; @@ -442,14 +442,14 @@ protected int generateOuterHashMapNoMatchResults(VectorizedRowBatch batch, int[] // Mark any scratch small table scratch columns that would normally receive a copy of the // key as null, too. - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { + for (int column : bigTableOuterKeyOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; } // Small table values are set to null. - for (int column : smallTableMapping.getOutputColumns()) { + for (int column : smallTableOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[batchIndex] = true; @@ -573,7 +573,7 @@ protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hiv batch.selectedInUse = true; } - for (int column : smallTableMapping.getOutputColumns()) { + for (int column : smallTableOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; @@ -582,7 +582,7 @@ protected int generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hiv // Mark any scratch small table scratch columns that would normally receive a copy of the key // as null, too. - for (int column : bigTableOuterKeyMapping.getOutputColumns()) { + for (int column : bigTableOuterKeyOutputVectorColumns) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index 8f18672..09094c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -123,6 +124,19 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + + for (int column : bigTableOuterKeyOutputVectorColumns) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + // For outer join, DO NOT apply filters yet. It is incorrect for outer join to // apply the filter before hash table matching. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index ffee959..7e77068 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -128,6 +128,19 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + + for (int column : bigTableOuterKeyOutputVectorColumns) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + // For outer join, DO NOT apply filters yet. It is incorrect for outer join to // apply the filter before hash table matching. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 5167c19..a3cfcb9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -24,6 +24,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -115,6 +116,19 @@ public void process(Object row, int tag) throws HiveException { batchCounter++; + // For join operators that can generate small table results, reset their + // (target) scratch columns. + + for (int column : smallTableOutputVectorColumns) { + ColumnVector smallTableColumn = batch.cols[column]; + smallTableColumn.reset(); + } + + for (int column : bigTableOuterKeyOutputVectorColumns) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + // For outer join, DO NOT apply filters yet. It is incorrect for outer join to // apply the filter before hash table matching.