diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 75983fe..60f4667 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; /** @@ -59,7 +60,7 @@ public Writable getWritableObject(int index) { index = 0; } if (!noNulls && isNull[index]) { - return null; + return NullWritable.get(); } else { writableObj.set(vector[index]); return writableObj; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index bea48cf..3a866ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; /** @@ -59,7 +60,7 @@ public Writable getWritableObject(int index) { index = 0; } if (!noNulls && isNull[index]) { - return null; + return NullWritable.get(); } else { writableObj.set(vector[index]); return writableObj; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index 704de4b..3595654 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -24,11 +24,11 @@ * This class represents an Or expression. This applies short circuit optimization. */ public class FilterExprOrExpr extends VectorExpression { - VectorExpression childExpr1; - VectorExpression childExpr2; - int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; - int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private final VectorExpression childExpr1; + private final VectorExpression childExpr2; + private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) { this.childExpr1 = childExpr1; @@ -42,15 +42,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } boolean prevSelectInUse = batch.selectedInUse; - //Clone the selected vector - int [] sel = batch.selected; + + // Save the original selected vector + int[] sel = batch.selected; if (batch.selectedInUse) { - for (int i = 0; i < n; i++) { - tmpSelect1[i] = sel[i]; - } + System.arraycopy(sel, 0, initialSelected, 0, n); } else { for (int i = 0; i < n; i++) { - tmpSelect1[i] = i; + initialSelected[i] = i; sel[i] = i; } batch.selectedInUse = true; @@ -58,40 +57,44 @@ public void evaluate(VectorizedRowBatch batch) { childExpr1.evaluate(batch); - //Calculate unselected ones in last evaluate. - for (int i = 0; i < tmp.length; i++) { - tmp[i] = 0; + // Preserve the selected reference and size values generated + // after the first child is evaluated. + int sizeAfterFirstChild = batch.size; + int[] selectedAfterFirstChild = batch.selected; + + // Calculate unselected ones in last evaluate. + for (int j = 0; j < n; j++) { + tmp[initialSelected[j]] = 0; } for (int j = 0; j < batch.size; j++) { - int i = sel[j]; - tmp[i] = 1; + tmp[selectedAfterFirstChild[j]] = 1; } int unselectedSize = 0; - for (int j =0; j < n; j++) { - int i = tmpSelect1[j]; + for (int j = 0; j < n; j++) { + int i = initialSelected[j]; if (tmp[i] == 0) { unselected[unselectedSize++] = i; } } - //Preserve current selected and size - int currentSize = batch.size; - int [] currentSelected = batch.selected; - //Evaluate second child expression over unselected ones only. + // Evaluate second child expression over unselected ones only. batch.selected = unselected; batch.size = unselectedSize; + childExpr2.evaluate(batch); - //Merge the result of last evaluate to previous evaluate. - int newSize = batch.size + currentSize; - for (int i = batch.size; i < newSize; i++ ) { - batch.selected[i] = currentSelected[i-batch.size]; - } + // Merge the result of last evaluate to previous evaluate. + int newSize = batch.size + sizeAfterFirstChild; + System.arraycopy(selectedAfterFirstChild, 0, batch.selected, batch.size, sizeAfterFirstChild); batch.size = newSize; if (newSize == n) { - //Filter didn't do anything + // Filter didn't do anything batch.selectedInUse = prevSelectInUse; } + + // unselected array is taken away by the row batch + // so take the row batch's original one. + unselected = selectedAfterFirstChild; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java index 6f3b101..51a5cec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java @@ -24,10 +24,10 @@ * This class represents an NOT filter expression. This applies short circuit optimization. */ public class FilterNotExpr extends VectorExpression { - VectorExpression childExpr1; - int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; - int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private final VectorExpression childExpr1; + private final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + private final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; public FilterNotExpr(VectorExpression childExpr1) { this.childExpr1 = childExpr1; @@ -37,19 +37,17 @@ public FilterNotExpr(VectorExpression childExpr1) { public void evaluate(VectorizedRowBatch batch) { int n = batch.size; - if (n<=0) { + if (n <= 0) { return; } - //Clone the selected vector - int [] sel = batch.selected; + // Clone the selected vector + int[] sel = batch.selected; if (batch.selectedInUse) { - for (int i = 0; i < n; i++) { - tmpSelect1[i] = sel[i]; - } + System.arraycopy(sel, 0, initialSelected, 0, n); } else { for (int i = 0; i < n; i++) { - tmpSelect1[i] = i; + initialSelected[i] = i; sel[i] = i; } batch.selectedInUse = true; @@ -57,25 +55,30 @@ public void evaluate(VectorizedRowBatch batch) { childExpr1.evaluate(batch); - //Calculate unselected ones in last evaluate. - for (int i = 0; i < tmp.length; i++) { - tmp[i] = 0; + // Calculate unselected ones in last evaluate. + for (int i = 0; i < n; i++) { + tmp[initialSelected[i]] = 0; } + + // Need to set sel reference again, because the child expression might + // have invalidated the earlier reference + sel = batch.selected; for (int j = 0; j < batch.size; j++) { int i = sel[j]; tmp[i] = 1; } int unselectedSize = 0; - for (int j =0; j < n; j++) { - int i = tmpSelect1[j]; + for (int j = 0; j < n; j++) { + int i = initialSelected[j]; if (tmp[i] == 0) { unselected[unselectedSize++] = i; } } - //The unselected is the new selected + // The unselected is the new selected, swap the arrays batch.selected = unselected; - batch.size = unselectedSize; + unselected = sel; + batch.size = unselectedSize; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java index ef64e4b..1929453 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java @@ -21,15 +21,17 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression selects a row if the given boolean column is false. + */ public class SelectColumnIsFalse extends VectorExpression { - int colNum1; + private final int colNum1; - public SelectColumnIsFalse(int colNum1) - { - this.colNum1 = colNum1; - } + public SelectColumnIsFalse(int colNum1) { + this.colNum1 = colNum1; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -43,29 +45,29 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] nullVector = inputColVector1.isNull; if (n <= 0) { - //Nothing to do + // Nothing to do return; } if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. if (vector1[0] == 1) { // All are filtered out batch.size = 0; + return; + } else { + // All are selected; + return; } } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 0) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { @@ -80,19 +82,22 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (inputColVector1.isRepeating) { - //Repeating and null value - batch.size = 0; + if (nullVector[0] || (vector1[0] == 1)) { + // All are filtered out + batch.size = 0; + } else { + // All are selected; + return; + } } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 0 && !nullVector[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { @@ -106,7 +111,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } - } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java index 2d929a8..da9311a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java @@ -21,14 +21,17 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression selects a row if the given column is null. + */ public class SelectColumnIsNotNull extends VectorExpression { - int colNum; + private final int colNum; - public SelectColumnIsNotNull(int colNum) { - this.colNum = colNum; - } + public SelectColumnIsNotNull(int colNum) { + this.colNum = colNum; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -37,46 +40,47 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - //Note: if type of isNull could be long[], could we just re-use this - //vector as the output vector. No iterations would be needed. boolean[] nullPos = inputColVector.isNull; int n = batch.size; if (n <= 0) { - //Nothing to do + // Nothing to do return; } if (inputColVector.noNulls) { - //All selected, do nothing + // All selected, do nothing return; } else if (inputColVector.isRepeating) { - //All must be null - batch.size = 0; + if (nullPos[0]) { + // All are null so none are selected + batch.size = 0; + return; + } else { + // None are null, so all are selected + return; + } } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; - int newSize=0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - newSelected[newSize++] = i; - } - } - batch.selected = newSelected; + int newSize = 0; + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + sel[newSize++] = i; + } + } batch.size = newSize; - } - else { + } else { int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - sel[newSize++] = i; + for (int i = 0; i != n; i++) { + if (!nullPos[i]) { + sel[newSize++] = i; } - } + } if (newSize < n) { batch.selectedInUse = true; batch.size = newSize; } - } - } + } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java index 446f071..9f08b3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java @@ -21,60 +21,63 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression selects a row if the given column is null. + */ public class SelectColumnIsNull extends VectorExpression { - int colNum; + private final int colNum; - public SelectColumnIsNull(int colNum) { - this.colNum = colNum; - } + public SelectColumnIsNull(int colNum) { + this.colNum = colNum; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { super.evaluateChildren(batch); } ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - //Note: if type of isNull could be long[], could we just re-use this - //vector as the output vector. No iterations would be needed. boolean[] nullPos = inputColVector.isNull; int n = batch.size; if (n <= 0) { - //Nothing to do + // Nothing to do return; } if (inputColVector.noNulls) { batch.size = 0; } else if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Selection property will not change. - return; + if (nullPos[0]) { + // All are null, so all must be selected. + return; + } else { + // None are null, so none are selected + batch.size = 0; + return; + } } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; - int newSize=0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (nullPos[i]) { - newSelected[newSize++] = i; - } - } - batch.selected = newSelected; + int newSize = 0; + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (nullPos[i]) { + sel[newSize++] = i; + } + } batch.size = newSize; - } - else { + } else { int newSize = 0; - for(int i = 0; i != n; i++) { - if (nullPos[i]) { - sel[newSize++] = i; + for (int i = 0; i != n; i++) { + if (nullPos[i]) { + sel[newSize++] = i; } - } + } if (newSize < n) { batch.selectedInUse = true; batch.size = newSize; } - } - } + } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java index b1bce44..ded913c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java @@ -21,14 +21,17 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression selects a row if the given boolean column is true. + */ public class SelectColumnIsTrue extends VectorExpression { - int colNum1; + private final int colNum1; - public SelectColumnIsTrue(int colNum1) { - this.colNum1 = colNum1; - } + public SelectColumnIsTrue(int colNum1) { + this.colNum1 = colNum1; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { super.evaluateChildren(batch); @@ -40,29 +43,29 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] nullVector = inputColVector1.isNull; if (n <= 0) { - //Nothing to do + // Nothing to do return; } if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. if (vector1[0] == 0) { // All are filtered out batch.size = 0; + return; + } else { + // All are selected + return; } } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 1) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { @@ -76,23 +79,27 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { - if (inputColVector1.isRepeating) { - //Repeating null value - batch.size = 0; + if (inputColVector1.isRepeating) { + if (nullVector[0] || (vector1[0] == 0)) { + // All are filtered + batch.size = 0; + return; + } else { + // All are selected + return; + } } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; - int newSize=0; - for(int j=0; j != n; j++) { + int newSize = 0; + for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 1 && !nullVector[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { - int newSize=0; - for(int i = 0; i != n; i++) { + int newSize = 0; + for (int i = 0; i != n; i++) { if (vector1[i] == 1 && !nullVector[i]) { sel[newSize++] = i; } @@ -103,7 +110,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } - } + } @Override public int getOutputColumn() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index d38cc5d..39d2a3b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -19,19 +19,28 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.junit.Assert; import org.junit.Test; - +/** +* Unit tests for logical expressions AND, OR, NOT, IsNull etc. +*/ public class TestVectorLogicalExpressions { + private static final int BOOLEAN_COLUMN_TEST_SIZE = 9; + @Test public void testLongColOrLongCol() { VectorizedRowBatch batch = getBatchThreeBooleanCols(); - ColOrCol expr = new ColOrCol(0,1,2); + ColOrCol expr = new ColOrCol(0, 1, 2); LongColumnVector outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); // verify @@ -67,9 +76,12 @@ public void testLongColOrLongCol() { // try isRepeating path (left input only), no nulls batch = getBatchThreeBooleanCols(); - batch.cols[0].noNulls = true; batch.cols[0].isRepeating = true; - batch.cols[1].noNulls = true; batch.cols[1].isRepeating = false; - batch.cols[2].noNulls = false; batch.cols[2].isRepeating = true; + batch.cols[0].noNulls = true; + batch.cols[0].isRepeating = true; + batch.cols[1].noNulls = true; + batch.cols[1].isRepeating = false; + batch.cols[2].noNulls = false; + batch.cols[2].isRepeating = true; outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); @@ -114,21 +126,23 @@ private VectorizedRowBatch getBatchThreeBooleanCols() { v2.isRepeating = true; // this value should get over-written with correct value v2.noNulls = true; // ditto - batch.size = 9; + batch.size = BOOLEAN_COLUMN_TEST_SIZE; return batch; } @Test public void testBooleanNot() { VectorizedRowBatch batch = getBatchThreeBooleanCols(); - NotCol expr = new NotCol(0,2); + NotCol expr = new NotCol(0, 2); LongColumnVector outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); // Case with nulls Assert.assertFalse(outCol.isRepeating); - Assert.assertEquals(1, outCol.vector[0]); Assert.assertFalse(outCol.isNull[0]); - Assert.assertEquals(0, outCol.vector[2]); Assert.assertFalse(outCol.isNull[0]); + Assert.assertEquals(1, outCol.vector[0]); + Assert.assertFalse(outCol.isNull[0]); + Assert.assertEquals(0, outCol.vector[2]); + Assert.assertFalse(outCol.isNull[0]); Assert.assertTrue(outCol.isNull[4]); // No nulls case @@ -145,7 +159,7 @@ public void testBooleanNot() { batch.cols[0].isRepeating = true; batch.cols[0].isNull[0] = true; expr.evaluate(batch); - Assert.assertTrue(outCol.isRepeating);; + Assert.assertTrue(outCol.isRepeating); Assert.assertTrue(outCol.isNull[0]); // isRepeating, and no nulls @@ -160,10 +174,10 @@ public void testBooleanNot() { } @Test - public void testIsNullExpr () { + public void testIsNullExpr() { // has nulls, not repeating VectorizedRowBatch batch = getBatchThreeBooleanCols(); - IsNull expr = new IsNull(0,2); + IsNull expr = new IsNull(0, 2); LongColumnVector outCol = (LongColumnVector) batch.cols[2]; expr.evaluate(batch); Assert.assertEquals(0, outCol.vector[0]); @@ -185,7 +199,8 @@ public void testIsNullExpr () { batch.cols[0].isRepeating = true; batch.cols[0].isNull[0] = true; expr.evaluate(batch); - Assert.assertTrue(outCol.isRepeating);; + Assert.assertTrue(outCol.isRepeating); + ; Assert.assertEquals(1, outCol.vector[0]); Assert.assertTrue(outCol.noNulls); @@ -238,4 +253,162 @@ public void testBooleanFiltersOnColumns() { assertEquals(4, batch.selected[4]); assertEquals(5, batch.selected[5]); } + + @Test + public void testFilterNotExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + VectorizedRowBatch batch2 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr = new SelectColumnIsTrue(0); + FilterNotExpr notExpr = new FilterNotExpr(expr); + + notExpr.evaluate(batch1); + + notExpr.evaluate(batch2); + + assertEquals(batch1.size, batch2.size); + for (int j = 0; j < batch1.size; j++) { + assertEquals(batch1.selected[j], batch2.selected[j]); + int i = j; + assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]), + (((LongColumnVector) batch2.cols[0]).vector[i])); + } + } + + @Test + public void testFilterExprOrExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + VectorizedRowBatch batch2 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + + orExpr.evaluate(batch1); + orExpr.evaluate(batch2); + + assertEquals(batch1.size, batch2.size); + for (int j = 0; j < batch1.size; j++) { + assertEquals(batch1.selected[j], batch2.selected[j]); + int i = j; + assertEquals((((LongColumnVector) batch1.cols[0]).vector[i]), + (((LongColumnVector) batch2.cols[0]).vector[i])); + } + + assertEquals(5, batch1.size); + Set expectedSet = new HashSet(); + expectedSet.add(0); + expectedSet.add(2); + expectedSet.add(3); + expectedSet.add(4); + expectedSet.add(7); + + assertTrue(expectedSet.contains(batch1.selected[0])); + assertTrue(expectedSet.contains(batch1.selected[1])); + assertTrue(expectedSet.contains(batch1.selected[2])); + assertTrue(expectedSet.contains(batch1.selected[3])); + assertTrue(expectedSet.contains(batch1.selected[4])); + + // Repeat the expression on the same batch, + // the result must be unchanged. + orExpr.evaluate(batch1); + + assertEquals(5, batch1.size); + assertTrue(expectedSet.contains(batch1.selected[0])); + assertTrue(expectedSet.contains(batch1.selected[1])); + assertTrue(expectedSet.contains(batch1.selected[2])); + assertTrue(expectedSet.contains(batch1.selected[3])); + assertTrue(expectedSet.contains(batch1.selected[4])); + } + + @Test + public void testFilterExprOrExprWithBatchReuse() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + + orExpr.evaluate(batch1); + + // Now re-initialize batch1 to simulate batch-object re-use. + for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { + batch1.selected[i] = 0; + } + batch1.size = BOOLEAN_COLUMN_TEST_SIZE; + batch1.selectedInUse = false; + + // Swap column vectors to simulate change in data + ColumnVector tmp = batch1.cols[0]; + batch1.cols[0] = batch1.cols[1]; + batch1.cols[1] = tmp; + + orExpr.evaluate(batch1); + + assertEquals(5, batch1.size); + Set expectedSet = new HashSet(); + expectedSet.add(0); + expectedSet.add(1); + expectedSet.add(3); + expectedSet.add(5); + expectedSet.add(6); + + assertTrue(expectedSet.contains(batch1.selected[0])); + assertTrue(expectedSet.contains(batch1.selected[1])); + assertTrue(expectedSet.contains(batch1.selected[2])); + assertTrue(expectedSet.contains(batch1.selected[3])); + assertTrue(expectedSet.contains(batch1.selected[4])); + } + + @Test + public void testFilterExprOrExprWithSelectInUse() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + + // Evaluate batch1 so that temporary arrays in the expression + // have residual values to interfere in later computation + orExpr.evaluate(batch1); + + // Swap column vectors, but keep selected vector unchanged + ColumnVector tmp = batch1.cols[0]; + batch1.cols[0] = batch1.cols[1]; + batch1.cols[1] = tmp; + // Make sure row-7 is in the output. + batch1.cols[1].isNull[7] = false; + ((LongColumnVector) batch1.cols[1]).vector[7] = 0; + + orExpr.evaluate(batch1); + + assertEquals(3, batch1.size); + Set expectedSet = new HashSet(); + expectedSet.add(0); + expectedSet.add(3); + expectedSet.add(7); + + assertTrue(expectedSet.contains(batch1.selected[0])); + assertTrue(expectedSet.contains(batch1.selected[1])); + assertTrue(expectedSet.contains(batch1.selected[2])); + } + + @Test + public void testFilterExprAndExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprAndExpr orExpr = new FilterExprAndExpr(expr1, expr2); + + orExpr.evaluate(batch1); + + assertEquals(1, batch1.size); + + assertEquals(2, batch1.selected[0]); + } }