diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 75983fe..60f4667 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; /** @@ -59,7 +60,7 @@ public Writable getWritableObject(int index) { index = 0; } if (!noNulls && isNull[index]) { - return null; + return NullWritable.get(); } else { writableObj.set(vector[index]); return writableObj; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index bea48cf..3a866ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; /** @@ -59,7 +60,7 @@ public Writable getWritableObject(int index) { index = 0; } if (!noNulls && isNull[index]) { - return null; + return NullWritable.get(); } else { writableObj.set(vector[index]); return writableObj; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java index 704de4b..5d2c9bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -26,7 +26,7 @@ public class FilterExprOrExpr extends VectorExpression { VectorExpression childExpr1; VectorExpression childExpr2; - int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; @@ -42,15 +42,14 @@ public void evaluate(VectorizedRowBatch batch) { return; } boolean prevSelectInUse = batch.selectedInUse; - //Clone the selected vector + + // Save the original selected vector int [] sel = batch.selected; if (batch.selectedInUse) { - for (int i = 0; i < n; i++) { - tmpSelect1[i] = sel[i]; - } + System.arraycopy(sel, 0, initialSelected, 0, n); } else { for (int i = 0; i < n; i++) { - tmpSelect1[i] = i; + initialSelected[i] = i; sel[i] = i; } batch.selectedInUse = true; @@ -58,40 +57,47 @@ public void evaluate(VectorizedRowBatch batch) { childExpr1.evaluate(batch); - //Calculate unselected ones in last evaluate. - for (int i = 0; i < tmp.length; i++) { + // Calculate unselected ones in last evaluate. + for (int i = 0; i < n; i++) { tmp[i] = 0; } + sel = batch.selected; for (int j = 0; j < batch.size; j++) { int i = sel[j]; tmp[i] = 1; } int unselectedSize = 0; for (int j =0; j < n; j++) { - int i = tmpSelect1[j]; + int i = initialSelected[j]; if (tmp[i] == 0) { unselected[unselectedSize++] = i; } } - //Preserve current selected and size + + // Preserve current selected and size int currentSize = batch.size; int [] currentSelected = batch.selected; - //Evaluate second child expression over unselected ones only. + // Evaluate second child expression over unselected ones only. batch.selected = unselected; batch.size = unselectedSize; + childExpr2.evaluate(batch); - //Merge the result of last evaluate to previous evaluate. + // Merge the result of last evaluate to previous evaluate. int newSize = batch.size + currentSize; - for (int i = batch.size; i < newSize; i++ ) { + for (int i = batch.size; i < newSize; i++) { batch.selected[i] = currentSelected[i-batch.size]; } batch.size = newSize; if (newSize == n) { - //Filter didn't do anything + // Filter didn't do anything batch.selectedInUse = prevSelectInUse; } + + // unselected array is taken away by the row batch + // so take the row batch's original one. + unselected = currentSelected; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java index 6f3b101..71f5c78 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java @@ -25,7 +25,7 @@ */ public class FilterNotExpr extends VectorExpression { VectorExpression childExpr1; - int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; @@ -41,15 +41,13 @@ public void evaluate(VectorizedRowBatch batch) { return; } - //Clone the selected vector + // Clone the selected vector int [] sel = batch.selected; if (batch.selectedInUse) { - for (int i = 0; i < n; i++) { - tmpSelect1[i] = sel[i]; - } + System.arraycopy(sel, 0, initialSelected, 0, n); } else { for (int i = 0; i < n; i++) { - tmpSelect1[i] = i; + initialSelected[i] = i; sel[i] = i; } batch.selectedInUse = true; @@ -57,24 +55,27 @@ public void evaluate(VectorizedRowBatch batch) { childExpr1.evaluate(batch); - //Calculate unselected ones in last evaluate. - for (int i = 0; i < tmp.length; i++) { + // Calculate unselected ones in last evaluate. + for (int i = 0; i < n; i++) { tmp[i] = 0; } + sel = batch.selected; for (int j = 0; j < batch.size; j++) { int i = sel[j]; tmp[i] = 1; } int unselectedSize = 0; for (int j =0; j < n; j++) { - int i = tmpSelect1[j]; + int i = initialSelected[j]; if (tmp[i] == 0) { unselected[unselectedSize++] = i; } } - //The unselected is the new selected + // The unselected is the new selected, swap the arrays + int [] swapTmp = batch.selected; batch.selected = unselected; + unselected = swapTmp; batch.size = unselectedSize; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java index ef64e4b..4d83fd1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java @@ -56,16 +56,14 @@ public void evaluate(VectorizedRowBatch batch) { batch.size = 0; } } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 0) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { @@ -83,16 +81,14 @@ public void evaluate(VectorizedRowBatch batch) { //Repeating and null value batch.size = 0; } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 0 && !nullVector[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java index 2d929a8..8af75c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java @@ -53,15 +53,13 @@ public void evaluate(VectorizedRowBatch batch) { //All must be null batch.size = 0; } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; int newSize=0; for(int j=0; j != n; j++) { int i = sel[j]; if (!nullPos[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } - batch.selected = newSelected; batch.size = newSize; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java index 446f071..7e0ffb3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java @@ -51,15 +51,13 @@ public void evaluate(VectorizedRowBatch batch) { //Selection property will not change. return; } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; int newSize=0; for(int j=0; j != n; j++) { int i = sel[j]; if (nullPos[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } - batch.selected = newSelected; batch.size = newSize; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java index b1bce44..75a9b7e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java @@ -53,16 +53,14 @@ public void evaluate(VectorizedRowBatch batch) { batch.size = 0; } } else if (batch.selectedInUse) { - int[] newSelected = new int[n]; int newSize = 0; for (int j = 0; j != n; j++) { int i = sel[j]; if (vector1[i] == 1) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize = 0; for (int i = 0; i != n; i++) { @@ -80,16 +78,14 @@ public void evaluate(VectorizedRowBatch batch) { //Repeating null value batch.size = 0; } else if (batch.selectedInUse) { - int [] newSelected = new int[n]; int newSize=0; for(int j=0; j != n; j++) { int i = sel[j]; if (vector1[i] == 1 && !nullVector[i]) { - newSelected[newSize++] = i; + sel[newSize++] = i; } } batch.size = newSize; - batch.selected = newSelected; } else { int newSize=0; for(int i = 0; i != n; i++) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java index f2e5399..c656ce4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestConstantVectorExpression.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.util.VectorizedRowGroupGenUtil; import org.junit.Test; public class TestConstantVectorExpression { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index d38cc5d..d5078f9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -19,6 +19,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.HashSet; +import java.util.Set; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -238,4 +242,77 @@ public void testBooleanFiltersOnColumns() { assertEquals(4, batch.selected[4]); assertEquals(5, batch.selected[5]); } + + @Test + public void testFilterNotExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + VectorizedRowBatch batch2 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr = new SelectColumnIsTrue(0); + FilterNotExpr notExpr = new FilterNotExpr(expr); + + notExpr.evaluate(batch1); + + notExpr.evaluate(batch2); + + assertEquals(batch1.size, batch2.size); + for (int j = 0; j < batch1.size; j++) { + assertEquals(batch1.selected[j], batch2.selected[j]); + int i = j; + assertEquals( (((LongColumnVector)batch1.cols[0]).vector[i]), + (((LongColumnVector)batch2.cols[0]).vector[i])); + } + } + + @Test + public void testFilterExprOrExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + VectorizedRowBatch batch2 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprOrExpr orExpr = new FilterExprOrExpr(expr1, expr2); + + orExpr.evaluate(batch1); + orExpr.evaluate(batch2); + + assertEquals(batch1.size, batch2.size); + for (int j = 0; j < batch1.size; j++) { + assertEquals(batch1.selected[j], batch2.selected[j]); + int i = j; + assertEquals( (((LongColumnVector)batch1.cols[0]).vector[i]), + (((LongColumnVector)batch2.cols[0]).vector[i])); + } + + assertEquals(5, batch1.size); + Set expectedSet = new HashSet(); + expectedSet.add(0); + expectedSet.add(2); + expectedSet.add(3); + expectedSet.add(4); + expectedSet.add(7); + + assertTrue(expectedSet.contains(batch1.selected[0])); + assertTrue(expectedSet.contains(batch1.selected[1])); + assertTrue(expectedSet.contains(batch1.selected[2])); + assertTrue(expectedSet.contains(batch1.selected[3])); + assertTrue(expectedSet.contains(batch1.selected[4])); + } + + @Test + public void testFilterExprAndExpr() { + VectorizedRowBatch batch1 = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr1 = new SelectColumnIsTrue(0); + SelectColumnIsFalse expr2 = new SelectColumnIsFalse(1); + + FilterExprAndExpr orExpr = new FilterExprAndExpr(expr1, expr2); + + orExpr.evaluate(batch1); + + assertEquals(1, batch1.size); + + assertEquals(2, batch1.selected[0]); + } }