diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index a2aadca..fd5f5b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression evaluates to true if the given input columns is not null. + * The boolean output is stored in the specified output column. + */ public class IsNotNull extends VectorExpression { - int colNum; - int outputColumn; + int colNum; + int outputColumn; - public IsNotNull(int colNum, int outputColumn) { - this.colNum = colNum; - this.outputColumn = outputColumn; - } + public IsNotNull(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -44,38 +48,34 @@ public void evaluate(VectorizedRowBatch batch) { long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector; if (n <= 0) { - //Nothing to do + // Nothing to do return; } - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Selection property will not change. - if (nullPos[0]) { - outputVector[0] = 0; + // output never has nulls for this operator + batch.cols[outputColumn].noNulls = true; + if (inputColVector.noNulls) { + outputVector[0] = 1; + batch.cols[outputColumn].isRepeating = true; + } else if (inputColVector.isRepeating) { + // All must be selected otherwise size would be zero + // Selection property will not change. + outputVector[0] = nullPos[0] ? 0 : 1; + batch.cols[outputColumn].isRepeating = true; + } else { + batch.cols[outputColumn].isRepeating = false; + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = nullPos[i] ? 0 : 1; + } } else { - outputVector[0] = 1; + for (int i = 0; i != n; i++) { + outputVector[i] = nullPos[i] ? 0 : 1; + } } - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (nullPos[i]) { - outputVector[i] = 0; - } else { - outputVector[i] = 1; - } - } - } - else { - for(int i = 0; i != n; i++) { - if (nullPos[i]) { - outputVector[i] = 0; - } else { - outputVector[i] = 1; - } - } - } - } + } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 01f3a9c..a470aae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -21,16 +21,20 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * This expression evaluates to true if the given input columns is null. + * The boolean output is stored in the specified output column. + */ public class IsNull extends VectorExpression { - int colNum; - int outputColumn; + int colNum; + int outputColumn; - public IsNull(int colNum, int outputColumn) { - this.colNum = colNum; - this.outputColumn = outputColumn; - } + public IsNull(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -39,52 +43,36 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - //Note: if type of isNull could be long[], could we just re-use this - //vector as the output vector. No iterations would be needed. boolean[] nullPos = inputColVector.isNull; int n = batch.size; long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector; if (n <= 0) { - //Nothing to do, this is EOF + // Nothing to do, this is EOF return; } // output never has nulls for this operator batch.cols[outputColumn].noNulls = true; - if (inputColVector.isRepeating && inputColVector.noNulls) { + if (inputColVector.noNulls) { outputVector[0] = 0; batch.cols[outputColumn].isRepeating = true; - } else if (inputColVector.isRepeating && !inputColVector.noNulls) { + } else if (inputColVector.isRepeating) { outputVector[0] = nullPos[0] ? 1 : 0; batch.cols[outputColumn].isRepeating = true; - } else if (!inputColVector.isRepeating && inputColVector.noNulls) { + } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = 0; - } - } - else { - for(int i = 0; i != n; i++) { - outputVector[i] = 0; - } - } - batch.cols[outputColumn].isRepeating = false; - } else /* !inputColVector.isRepeating && !inputColVector.noNulls */ { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = nullPos[i] ? 1 : 0; } - } - else { - for(int i = 0; i != n; i++) { + } else { + for (int i = 0; i != n; i++) { outputVector[i] = nullPos[i] ? 1 : 0; } } batch.cols[outputColumn].isRepeating = false; } - } + } @Override public int getOutputColumn() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index d60fca4..d310251 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -20,16 +20,19 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +/** + * Evaluates the boolean complement of the input. + */ public class NotCol extends VectorExpression { - int colNum; - int outputColumn; + private final int colNum; + private final int outputColumn; - public NotCol(int colNum, int outputColumn) { - this.colNum = colNum; - this.outputColumn = outputColumn; - } + public NotCol(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } - @Override + @Override public void evaluate(VectorizedRowBatch batch) { if (childExpressions != null) { @@ -44,40 +47,50 @@ public void evaluate(VectorizedRowBatch batch) { long[] outputVector = outV.vector; if (n <= 0) { - //Nothing to do, this is EOF + // Nothing to do, this is EOF return; } - if (inputColVector.isRepeating) { - outV.isRepeating = true; - // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1 - outputVector[0] = ~vector[0] & 1; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ~vector[i] & 1; - } - outV.isRepeating = false; - } - else { - for(int i = 0; i != n; i++) { - outputVector[i] = ~vector[i] & 1; - } - outV.isRepeating = false; - } - - // handle NULLs if (inputColVector.noNulls) { outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1 + outputVector[0] = ~vector[0] & 1; + } else if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ~vector[i] & 1; + } + outV.isRepeating = false; + } else { + for (int i = 0; i != n; i++) { + outputVector[i] = ~vector[i] & 1; + } + outV.isRepeating = false; + } } else { outV.noNulls = false; if (inputColVector.isRepeating) { + outV.isRepeating = true; + outputVector[0] = ~vector[0] & 1; outV.isNull[0] = inputColVector.isNull[0]; + } else if (batch.selectedInUse) { + outV.isRepeating = false; + for (int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ~vector[i] & 1; + outV.isNull[i] = inputColVector.isNull[i]; + } } else { - System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + outV.isRepeating = false; + for (int i = 0; i != n; i++) { + outputVector[i] = ~vector[i] & 1; + outV.isNull[i] = inputColVector.isNull[i]; + } } } - } + } @Override public int getOutputColumn() { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index 39d2a3b..2426588 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -188,10 +188,9 @@ public void testIsNullExpr() { // No nulls case, not repeating batch.cols[0].noNulls = true; expr.evaluate(batch); - Assert.assertFalse(outCol.isRepeating); + Assert.assertTrue(outCol.isRepeating); Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); - Assert.assertEquals(0, outCol.vector[4]); // isRepeating, and there are nulls batch = getBatchThreeBooleanCols(); @@ -200,7 +199,6 @@ public void testIsNullExpr() { batch.cols[0].isNull[0] = true; expr.evaluate(batch); Assert.assertTrue(outCol.isRepeating); - ; Assert.assertEquals(1, outCol.vector[0]); Assert.assertTrue(outCol.noNulls); @@ -216,6 +214,46 @@ public void testIsNullExpr() { } @Test + public void testIsNotNullExpr() { + // has nulls, not repeating + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + IsNotNull expr = new IsNotNull(0, 2); + LongColumnVector outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + Assert.assertEquals(1, outCol.vector[0]); + Assert.assertEquals(0, outCol.vector[4]); + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // No nulls case, not repeating + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(1, outCol.vector[0]); + + // isRepeating, and there are nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating); + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertTrue(outCol.noNulls); + + // isRepeating, and no nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(1, outCol.vector[0]); + } + + @Test public void testBooleanFiltersOnColumns() { VectorizedRowBatch batch = getBatchThreeBooleanCols(); @@ -233,25 +271,76 @@ public void testBooleanFiltersOnColumns() { assertEquals(0, batch.selected[0]); assertEquals(2, batch.selected[1]); assertEquals(4, batch.selected[2]); + } - batch = getBatchThreeBooleanCols(); - SelectColumnIsNull expr2 = new SelectColumnIsNull(0); - expr2.evaluate(batch); + @Test + public void testSelectColumnIsNull() { + // has nulls, not repeating + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + SelectColumnIsNull expr = new SelectColumnIsNull(0); + expr.evaluate(batch); assertEquals(3, batch.size); assertEquals(4, batch.selected[0]); assertEquals(5, batch.selected[1]); assertEquals(8, batch.selected[2]); + // No nulls case, not repeating batch = getBatchThreeBooleanCols(); - SelectColumnIsNotNull expr3 = new SelectColumnIsNotNull(1); - expr3.evaluate(batch); + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + + // isRepeating, and there are nulls + batch = getBatchThreeBooleanCols(); + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + int initialSize = batch.size; + expr.evaluate(batch); + Assert.assertEquals(initialSize, batch.size); + + // isRepeating, and no nulls + batch = getBatchThreeBooleanCols(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + } + + @Test + public void testSelectColumnIsNotNull() { + // has nulls, not repeating + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + SelectColumnIsNotNull expr = new SelectColumnIsNotNull(0); + expr.evaluate(batch); assertEquals(6, batch.size); assertEquals(0, batch.selected[0]); assertEquals(1, batch.selected[1]); assertEquals(2, batch.selected[2]); assertEquals(3, batch.selected[3]); - assertEquals(4, batch.selected[4]); - assertEquals(5, batch.selected[5]); + assertEquals(6, batch.selected[4]); + assertEquals(7, batch.selected[5]); + + // No nulls case, not repeating + batch = getBatchThreeBooleanCols(); + batch.cols[0].noNulls = true; + int initialSize = batch.size; + expr.evaluate(batch); + Assert.assertEquals(initialSize, batch.size); + + // isRepeating, and there are nulls + batch = getBatchThreeBooleanCols(); + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + + // isRepeating, and no nulls + batch = getBatchThreeBooleanCols(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + initialSize = batch.size; + expr.evaluate(batch); + Assert.assertEquals(initialSize, batch.size); } @Test