diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index 0a95962..21062c6 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -64,70 +65,43 @@ public class extends VectorExpression { return; } - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!(vector1[0] vector2[0])) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (vector1[0] vector2[i]) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[0] vector2[i]) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (vector1[i] vector2[0]) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (vector1[i] vector2[0]) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { + // filter rows with NULL on left input + int newSize; + newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // filter rows with NULL on right input + newSize = NullUtil.filterNulls(batch.cols[colNum2], batch.selectedInUse, sel, n); + if (newSize < n) { + n = batch.size = newSize; + batch.selectedInUse = true; + } + + // All rows with nulls have been filtered out, so just do normal filter for non-null case + if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { + + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (!(vector1[0] vector2[0])) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { int i = sel[j]; - if (vector1[i] vector2[i]) { + if (vector1[0] vector2[i]) { sel[newSize++] = i; } } batch.size = newSize; } else { - int newSize = 0; + newSize = 0; for(int i = 0; i != n; i++) { - if (vector1[i] vector2[i]) { + if (vector1[0] vector2[i]) { sel[newSize++] = i; } } @@ -136,88 +110,42 @@ public class extends VectorExpression { batch.selectedInUse = true; } } - } else if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || nullPos2[0]) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - batch.size = 0; - } else { - if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (vector1[0] vector2[i]) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (vector1[0] vector2[i]) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] vector2[0]) { + sel[newSize++] = i; } } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - batch.size = 0; + batch.size = newSize; } else { - if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (vector1[i] vector2[0]) { - sel[newSize++] = i; - } - } + newSize = 0; + for(int i = 0; i != n; i++) { + if (vector1[i] vector2[0]) { + sel[newSize++] = i; } + } + if (newSize < batch.size) { batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (vector1[i] vector2[0]) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } + batch.selectedInUse = true; } } } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { + newSize = 0; + for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i] vector2[i]) { - sel[newSize++] = i; - } + if (vector1[i] vector2[i]) { + sel[newSize++] = i; } } batch.size = newSize; } else { - int newSize = 0; + newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos1[i] && !nullPos2[i]) { - if (vector1[i] vector2[i]) { - sel[newSize++] = i; - } + if (vector1[i] vector2[i]) { + sel[newSize++] = i; } } if (newSize < batch.size) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java index 18f63f5..258ed86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -317,4 +317,39 @@ public static void setNullDataEntriesDecimal( } } } + + /** + * Filter out rows with null values. Return the number of rows in the batch. + */ + public static int filterNulls(ColumnVector v, boolean selectedInUse, int[] sel, int n) { + int newSize = 0; + + if (v.noNulls) { + + // no rows will be filtered + return n; + } + + if (v.isRepeating) { + + // all rows are filtered if repeating null, otherwise no rows are filtered + return v.isNull[0] ? 0 : n; + } + + if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!v.isNull[i]) { + sel[newSize++] = i; + } + } + } else { + for (int i = 0; i != n; i++) { + if (!v.isNull[i]) { + sel[newSize++] = i; + } + } + } + return newSize; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java index 8560117..bcb6d6a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java @@ -62,31 +62,60 @@ public void testFilterLongColEqualLongScalar() { } @Test - public void testFilterLongColEqualLongColumn() { + public void testFilterLongColGreaterLongColumn() { int seed = 17; - VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch( + VectorizedRowBatch b = VectorizedRowGroupGenUtil.getVectorizedRowBatch( VectorizedRowBatch.DEFAULT_SIZE, 2, seed); - LongColumnVector lcv0 = (LongColumnVector) vrg.cols[0]; - LongColumnVector lcv1 = (LongColumnVector) vrg.cols[1]; + LongColumnVector lcv0 = (LongColumnVector) b.cols[0]; + LongColumnVector lcv1 = (LongColumnVector) b.cols[1]; + b.size = 3; FilterLongColGreaterLongColumn expr = new FilterLongColGreaterLongColumn(0, 1); - //Basic case - lcv0.vector[1] = 23; - lcv1.vector[1] = 19; - lcv0.vector[5] = 23; - lcv1.vector[5] = 19; - expr.evaluate(vrg); - assertEquals(2, vrg.size); - assertEquals(1, vrg.selected[0]); - assertEquals(5, vrg.selected[1]); + // Basic case + lcv0.vector[0] = 10; + lcv0.vector[1] = 10; + lcv0.vector[2] = 10; + lcv1.vector[0] = 20; + lcv1.vector[1] = 1; + lcv1.vector[2] = 7; - //handle null + expr.evaluate(b); + assertEquals(2, b.size); + assertEquals(1, b.selected[0]); + assertEquals(2, b.selected[1]); + + // handle null with selected in use lcv0.noNulls = false; lcv0.isNull[1] = true; - expr.evaluate(vrg); - assertEquals(1, vrg.size); - assertEquals(5, vrg.selected[0]); + expr.evaluate(b); + assertEquals(1, b.size); + assertEquals(2, b.selected[0]); + + // handle repeating + b.size = 3; + b.selectedInUse = false; + lcv0.isRepeating = true; + lcv0.noNulls = true; + expr.evaluate(b); + assertEquals(2, b.size); + + // handle repeating null + b.size = 3; + b.selectedInUse = false; + lcv0.isNull[0] = true; + lcv0.noNulls = false; + expr.evaluate(b); + assertEquals(0, b.size); + + // handle null on both sizes (not repeating) + b.size = 3; + b.selectedInUse = false; + lcv0.isRepeating = false; + lcv1.noNulls = false; + lcv1.isNull[2] = true; + expr.evaluate(b); + assertEquals(0, b.size); } @Test