diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index b5011c3..06f6c44 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,16 +66,11 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index ae0d348..0d20c78 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index cbec1ab..595870c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,45 +70,84 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] value; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index d5aef78..367c950 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,68 +72,94 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, vector[0], value, outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index fd31672..82fe36b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,25 +66,20 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - + // Handle nulls first NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 51e6994..09367b9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,92 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - } + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 3e95557..a1bdf65 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,16 +66,11 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 2be16cc..c15049a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,16 +67,12 @@ public class extends VectorExpression { DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] @@ -103,10 +105,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index 159a61e..d6936c6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,50 +70,92 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; } else if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] value; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + if (outputColVector.noNulls) { + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + } + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 2631468..1d7603e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,28 +72,12 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; +; HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value.compareTo(HiveDecimal.ZERO) == 0) { @@ -93,45 +85,90 @@ public class extends VectorExpression { outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + return; } else if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, vector[0], value, outputColVector); + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - // copy isNull entries first because operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index 811f6db..1f5fc86 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -44,6 +47,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -53,49 +62,84 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = ( vector[0]); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = ( vector[0]); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ( vector[i]); + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = ( vector[i]); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ( vector[i]); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ( vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = ( vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = ( vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ( vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index f0ab471..9125d7e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -47,6 +50,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -56,49 +65,86 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = - vector[0]; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = - vector[0]; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = -vector[i]; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = -vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = -vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = -vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + if (outputColVector.noNulls) { + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + } + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = -vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = -vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = -vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; + } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index 5db9a0b..5b180b3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,16 +81,11 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index bf4b24c..6dfc675 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchDate1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchDate1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 847ebac..7957b63 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,13 +81,8 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] vector1 = inputColVector1.vector; outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 180bebc..a51e677 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,58 +81,107 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index 4f12315..2de7f0c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Date; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index a6fa2ac..52685ab 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -75,6 +76,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,52 +95,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt index 30b03ba..c3f8022 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,17 +68,12 @@ public class extends VectorExpression { Decimal64ColumnVector inputColVector2 = (Decimal64ColumnVector) batch.cols[colNum2]; Decimal64ColumnVector outputColVector = (Decimal64ColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt index 81dcf33..866cd51 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,95 +72,133 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[0] value; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[0] value; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt index dc6ccb9..c768e89 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -56,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,95 +73,131 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[0]; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[0]; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 1ab5228..af27ddc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,7 +30,6 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import java.util.Arrays; public class extends VectorExpression { private static final long serialVersionUID = 1L; @@ -49,6 +51,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -58,54 +66,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - if (inputColVector.isRepeating) { + HiveDecimalWritable[] vector = inputColVector.vector; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - (0, vector[0], outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + (0, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; + return; + } - // Set isNull because decimal operation can yield a null. - outputIsNull[i] = false; - (i, vector[i], outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } else { - - // Set isNull because decimal operation can yield a null. - Arrays.fill(outputIsNull, 0, n, false); - for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 36ad892..4f4d4fa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -57,28 +57,27 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. + if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -105,10 +104,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((vector[0] < leftValue || vector[0] > rightValue)) { // Entire batch is filtered out. @@ -121,7 +117,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } @@ -132,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index 150d341..dd5f3ee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // filter rows with NULL on left input int newSize; newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); @@ -88,9 +89,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index a9ddeca..c955c06 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -51,25 +51,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -97,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -111,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } @@ -122,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 7c41f3e..f65d96f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -60,26 +60,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -108,10 +106,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -124,7 +119,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } @@ -136,7 +131,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index 6a82183..77fe7ae 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index 80a19d9..078b132 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 4b7e849..20dbaba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index f741409..4afed54 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,13 +71,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector1 = inputColVector1.vector; // filter rows with NULL on left input int newSize; @@ -90,9 +91,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] inputColVector2.(0))) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index 8ece14f..8f8104d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -57,24 +57,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -102,9 +100,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -116,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } @@ -127,7 +123,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index 18840f1..28b5704 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -52,25 +52,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +97,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +109,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } @@ -123,7 +120,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index b9a332a..66df0ab 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -56,27 +56,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -108,9 +107,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -124,7 +121,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; @@ -137,7 +134,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index 0f0cb2e..1455dc7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,19 +67,14 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; int[] start2 = inputColVector2.start; int[] length1 = inputColVector1.length; int[] length2 = inputColVector2.length; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index a85a889..155d7ec 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -52,27 +52,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -101,9 +100,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -116,7 +113,7 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } @@ -128,7 +125,7 @@ public abstract class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index f3d1e58..8ad2bb1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -53,27 +53,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -102,9 +101,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -117,7 +114,7 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } @@ -129,7 +126,7 @@ public abstract class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 53bf271..604060a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -59,25 +59,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -106,10 +103,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -122,7 +116,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } @@ -134,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index eaa58c7..f9bc9ee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,13 +68,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector2 = inputColVector2.vector; // filter rows with NULL on left input int newSize; @@ -87,9 +88,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(inputColVector1.(0) vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 2e38269..fc1be95 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -53,24 +53,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } @@ -123,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 697e3ef..0a541f9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -57,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,12 +76,6 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index 435316d..68e0006 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,18 +70,10 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector1.isNull; if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -103,9 +101,7 @@ public class extends VectorExpression { } } else { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -117,7 +113,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } @@ -128,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index 4887ad2..d5952de 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -56,6 +56,13 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,18 +70,10 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector2.isNull; if (inputColVector2.noNulls) { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -103,9 +102,7 @@ public class extends VectorExpression { } } else { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -118,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } @@ -130,7 +127,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index cc86a3e..e071205 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -58,27 +58,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -110,9 +109,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -126,7 +123,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; @@ -139,7 +136,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 52f1d9e..eb4892d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,44 +74,44 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector2 = arg2ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { - outputColVector.fill(arg3Scalar); + outputVector[0] = arg3Scalar; + outputColVector.isRepeating = true; } return; } - + // Extend any repeating values and noNulls indicator in the inputs to // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } } else /* there are nulls */ { diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 1693e8f..0309b23 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,21 +74,18 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector3 = arg3ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + outputVector[0] = arg2Scalar; + outputColVector.isRepeating = true; } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } @@ -95,18 +98,25 @@ public class extends VectorExpression { // for when arg3ColVector is repeating or has no nulls. arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index ebdfe47..5698b71 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,48 +74,96 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputVector[0] = arg2Scalar; } else { - outputColVector.fill(arg3Scalar); + outputVector[0] = arg3Scalar; } - } else if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + outputColVector.isRepeating = true; + return; + } + + if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); - outputIsNull[i] = false; + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 9767973..b5811a1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,16 +81,11 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ca5829c..6190e2c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index d6e45ac..1c3ea44 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,15 +79,9 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 6e232e7..489be86 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,59 +80,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index 041a651..067b872 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchDate2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchDate2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index f2d4eaf..d2cdc9b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -72,6 +73,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -85,52 +92,101 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + dtm.( + value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index bd2cbac..2d56625 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index 889c445..5e8e14b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -54,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,62 +69,92 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputVector[0] = vector1[0] value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector1[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] value ? 1 : 0; - } - } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector1[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = vector1[i] value ? 1 : 0; - outNulls[i] = false; + outputIsNull[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = vector1[i] value ? 1 : 0; + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index 4d79283..29fe443 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; - long[] outputVector = outputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] outputVector = outputColVector.vector; + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = value inputColVector.(0) ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value inputColVector.(i) ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value inputColVector.(i) ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = value inputColVector.(i) ? 1 : 0; + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index e95baa6..bb219c5 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,45 +82,83 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - outputVector[0] = value vector[0]; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = value vector[0]; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = value vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = value vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = value vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index 3ffca6c..4acc975 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,60 +72,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n); return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, value, vector[0], outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index 9f4ec50..7865e8a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,90 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = value vector[0] ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i] ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value vector[0] ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value vector[i] ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = value vector[i] ? 1 : 0; - } + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index aa33354..72e6a7b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,56 +82,90 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - boolean hasDivBy0 = false; if (inputColVector.isRepeating) { - denom = vector[0]; - outputVector[0] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + denom = vector[0]; + outputVector[0] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } else { - for(int i = 0; i != n; i++) { - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index 650101c..b09a129 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,67 +72,96 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } if (inputColVector.noNulls) { - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - - if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, value, vector[0], outputColVector); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } } else /* there are nulls */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because the operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + // The following may override a "false" null setting if an error or overflow occurs. + outputIsNull[i] = false; + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - - // copy isNull entries first because the operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + // The following may override a "false" null setting if an error or overflow occurs. + outputIsNull[i] = false; + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result anyway. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index 1b1db54..c4e691c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -53,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,7 +72,6 @@ public class extends VectorExpression { boolean[] nullPos2 = inputColVector2.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; @@ -74,19 +80,13 @@ public class extends VectorExpression { int[] length2 = inputColVector2.length; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.noNulls = true; + + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = true; if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; ret = (vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]); if (ret) { @@ -94,7 +94,24 @@ public class extends VectorExpression { } else { outVector[0] = 0; } - } else if (inputColVector1.isRepeating) { + outNull[0] = false; + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + // UNDONE: Move below repeated code... + NullUtil.initOutputNullsToFalse(outputColVector, /* isRepeating */ false, + batch.selectedInUse, sel, n); + } + + if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -159,10 +176,8 @@ public class extends VectorExpression { // handle case where only input 2 has nulls } else if (inputColVector1.noNulls) { - outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos2[0]; if (!nullPos2[0]) { if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { @@ -170,14 +185,32 @@ public class extends VectorExpression { } else { outVector[0] = 0; } - } - } else if (inputColVector1.isRepeating) { + outNull[0] = false; + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + NullUtil.initOutputNullsToFalse(outputColVector, /* isRepeating */ false, + batch.selectedInUse, sel, n); + } + + if (inputColVector1.isRepeating) { // no need to check for nulls in input 1 if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; if (!nullPos2[i]) { if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { @@ -185,8 +218,12 @@ public class extends VectorExpression { } else { outVector[i] = 0; } - } + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } } + return; } else { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; @@ -203,8 +240,9 @@ public class extends VectorExpression { } else if (inputColVector2.isRepeating) { if (nullPos2[0]) { // Entire output vector will be null - outputColVector.isRepeating = true; outNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; return; } if (batch.selectedInUse) { @@ -233,7 +271,6 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; if (!nullPos2[i]) { if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { @@ -241,7 +278,10 @@ public class extends VectorExpression { } else { outVector[i] = 0; } - } + } else { + outNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { @@ -260,10 +300,7 @@ public class extends VectorExpression { // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { - outputColVector.noNulls = false; if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos1[0]; if (!nullPos1[0]) { if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { @@ -271,18 +308,38 @@ public class extends VectorExpression { } else { outVector[0] = 0; } - } - } else if (inputColVector1.isRepeating) { + outNull[0] = false; + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + NullUtil.initOutputNullsToFalse(outputColVector, /* isRepeating */ false, + batch.selectedInUse, sel, n); + } + + if (inputColVector1.isRepeating) { if (nullPos1[0]) { // Entire output vector will be null - outputColVector.isRepeating = true; outNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; return; } + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = false; if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; @@ -292,7 +349,6 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - outNull[i] = false; if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; @@ -305,7 +361,6 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { @@ -313,11 +368,13 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { @@ -325,6 +382,9 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } @@ -332,7 +392,6 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { @@ -340,11 +399,13 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { @@ -352,6 +413,9 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } @@ -361,26 +425,44 @@ public class extends VectorExpression { } else { outputColVector.noNulls = false; if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos1[0] || nullPos2[0]; - if (!outNull[0]) { + if (!nullPos1[0] && !nullPos2[0]) { if ((vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0])) { + vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { outVector[0] = 0; } - } - } else if (inputColVector1.isRepeating) { + outNull[0] = false; + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + // UNDONE: Move below repeated code. + NullUtil.initOutputNullsToFalse(outputColVector, /* isRepeating */ false, + batch.selectedInUse, sel, n); + } + + if (inputColVector1.isRepeating) { if (nullPos1[0]) { outputColVector.isRepeating = true; outNull[0] = true; + outputColVector.noNulls = false; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos2[i]; if (!nullPos2[i]) { if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { @@ -388,11 +470,13 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; if (!nullPos2[i]) { if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { @@ -400,6 +484,9 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } @@ -407,12 +494,12 @@ public class extends VectorExpression { if (nullPos2[0]) { outputColVector.isRepeating = true; outNull[0] = true; + outputColVector.noNulls = false; return; } if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { @@ -420,11 +507,13 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; if (!nullPos1[i]) { if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { @@ -432,6 +521,9 @@ public class extends VectorExpression { } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } @@ -439,26 +531,30 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { + if (!nullPos1[i] && !nullPos2[i]) { if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { + if (!nullPos1[i] && !nullPos2[i]) { if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { outVector[i] = 0; } + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index ca55834..e279822 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,37 +54,58 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { outVector[0] = 0; } - } else if (batch.selectedInUse) { + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, + batch.selectedInUse, sel, n); + } + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; if ((vector[i], start[i], length[i], value, 0, value.length)) { @@ -102,38 +124,51 @@ public abstract class extends VectorExpression { } } } else { - outputColVector.noNulls = false; + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { outVector[0] = 0; } - } - } else if (batch.selectedInUse) { + outNull[0] = false; + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; } else { outVector[i] = 0; } + outNull[i] = false; + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + outNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { outVector[i] = 1; } else { outVector[i] = 0; } + outNull[i] = false; + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index ecb4d2a..ea371fc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -54,36 +55,55 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; long[] outVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { outVector[0] = 0; } - } else if (batch.selectedInUse) { + outNull[0] = false; + outputColVector.isRepeating = true; + return; + } + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * isNull entries when selectedInUse. */ + NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, + batch.selectedInUse, sel, n); + } + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; if ((value, 0, value.length, vector[i], start[i], length[i])) { @@ -102,38 +122,49 @@ public abstract class extends VectorExpression { } } } else { - outputColVector.noNulls = false; if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { outVector[0] = 0; } - } - } else if (batch.selectedInUse) { + outNull[0] = false; + } else { + outNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; } else { outVector[i] = 0; } + outNull[i] = false; + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } else { for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { outVector[i] = 1; } else { outVector[i] = 0; } + outNull[i] = false; + } else { + outNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index a27da10..dcc2c22 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -60,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,15 +80,9 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 9f708e2..6a35bf4 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,52 +81,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index b3d9a4b..526a52d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,15 +79,9 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index e49f614..e686bde 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,52 +78,98 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + dtm.( + inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 95e7271..e66d7aa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -58,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,12 +78,6 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index 6baa72a..cfa727f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,52 +79,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 54a1a37..1627f61 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,15 +66,9 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; [] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index 3bb95dd..b6b3a40 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = inputColVector.(0) value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.(i) value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.(i) value ? 1 : 0; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index 3db5d01..ebfabce 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -55,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,14 +73,8 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 1ee7b11..5adcfd3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,61 +73,88 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } } } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index 509f264..12a0195 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -74,6 +75,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -87,59 +94,103 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 2de3044..0f755d1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -73,6 +74,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -86,59 +93,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 4ed80d1..a271cf9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -71,6 +72,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -84,53 +91,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6cca0bb..f54a47b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,61 +75,90 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector2.noNulls; if (inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - } - } - } else { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 8264e8a..dfa79e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2936,6 +2936,20 @@ private VectorExpression getWhenExpression(List childExpr, childExpr.subList(2, childExpr.size())); } + if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + + // THEN NULL ELSE NULL: An unusual "case", but possible. + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr = + new IfExprNullNull( + outputColumnNum); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } if (isNullConst(thenDesc)) { final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 66de847..44b7c95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -97,19 +97,6 @@ public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) { } /** - * Iterates thru all the column vectors and sets noNull to - * specified value. - * - * @param batch - * Batch on which noNull is set - */ - public static void setNoNullFields(VectorizedRowBatch batch) { - for (int i = 0; i < batch.numCols; i++) { - batch.cols[i].noNulls = true; - } - } - - /** * Iterates thru all the column vectors and sets repeating to * specified column. * diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java index c15bdc1..cc1d4f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -68,38 +69,54 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDays(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setDays(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDays(outputColVector, vector, i); + // Set isNull before call in case it changes it mind. outputIsNull[i] = inputIsNull[i]; + setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case tney change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a2e4a52..98a71ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -75,57 +77,77 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; convert(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; convert(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { convert(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; if (!inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; convert(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index aa529ed..7ad0493 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -64,6 +64,7 @@ protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { outV.noNulls = false; return; } + outV.isNull[i] = false; switch (integerPrimitiveCategory) { case BYTE: outV.vector[i] = decWritable.byteValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 08abf27..8cd5c99 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; @@ -66,19 +67,24 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDouble(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + setDouble(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; setDouble(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDouble(outputColVector, vector, i); } @@ -88,14 +94,26 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDouble(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - setDouble(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } outputColVector.isRepeating = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index df25eac..348c528 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -65,39 +66,56 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setSeconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setSeconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setSeconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setSeconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setSeconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before call in case it changes it mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setSeconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + setSeconds(outputColVector, vector, i); + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index 42c34c8..274caf2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -54,7 +56,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -63,39 +64,61 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setMilliseconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; outputColVector.isRepeating = true; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setMilliseconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setMilliseconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setMilliseconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setMilliseconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before calls in case they change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setMilliseconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 34269da..c9a1d61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Casts a string vector to a date vector. @@ -64,28 +65,42 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } } else { @@ -94,27 +109,37 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { + if (!inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; evaluate(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 41443c5..a519934 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -81,24 +83,39 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } @@ -106,21 +123,26 @@ public void evaluate(VectorizedRowBatch batch) { } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; if (!inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; func(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; @@ -128,7 +150,12 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java index 3ea1e8c..2f018d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -85,24 +87,40 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } @@ -110,21 +128,26 @@ public void evaluate(VectorizedRowBatch batch) { } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; if (!inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; func(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } outV.isRepeating = false; @@ -132,7 +155,12 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index feb0ab6..06bfd1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -60,57 +62,72 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; if (!inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; evaluate(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 09dd4d9..9a50186 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -58,57 +60,80 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { + if (!inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); + } else { + outputIsNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java index a6cfee8..36308b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -183,57 +185,78 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { + if (inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; func(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index 1231cda..64f09e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -65,39 +66,45 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = toBool(inputColVector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = toBool(inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = toBool(inputColVector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); outputIsNull[i] = inputIsNull[i]; + outputVector[i] = toBool(inputColVector, i); } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index e696455..d682f4f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -51,7 +53,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] outputVector = outputColVector.vector; @@ -60,39 +61,55 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getDouble(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getDouble(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getDouble(i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getDouble(i); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getDouble(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index 36b9f13..cf8125a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -48,7 +50,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -57,20 +58,28 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getTimestampAsLong(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getTimestampAsLong(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getTimestampAsLong(i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getTimestampAsLong(i); } @@ -80,14 +89,24 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getTimestampAsLong(i); + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } outputColVector.isRepeating = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 127e431..57838a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -66,104 +68,127 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] & vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -171,49 +196,46 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } - outV.isRepeating = false; } outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ @@ -223,7 +245,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -231,32 +253,31 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } @@ -267,21 +288,19 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 3542a07..d3a13c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -69,104 +71,126 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] | vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -174,51 +198,47 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } - outV.isRepeating = false; } - outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ // either input 1 or input 2 may have nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -226,7 +246,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -234,57 +254,53 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index c7cab2a..d3a8936 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -124,67 +124,73 @@ public ConstantVectorExpression(int outputColumnNum, TypeInfo outputTypeInfo, bo private void evaluateLong(VectorizedRowBatch vrg) { LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { cv.vector[0] = longValue; + cv.isNull[0] = false; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { cv.vector[0] = doubleValue; + cv.isNull[0] = false; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { cv.setVal(0, bytesValue, 0, bytesValueLength); + cv.isNull[0] = false; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { dcv.vector[0].set(decimalValue); + dcv.isNull[0] = false; } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; + TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + tcv.isRepeating = true; if (!isNullValue) { - dcv.set(0, timestampValue); + tcv.set(0, timestampValue); + tcv.isNull[0] = false; } else { - dcv.isNull[0] = true; + tcv.isNull[0] = true; + tcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { dcv.set(0, intervalDayTimeValue); + dcv.isNull[0] = false; } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index 2699681..b73a296 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -83,21 +84,30 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } } else if (inputColVector1.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); @@ -105,6 +115,15 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -114,12 +133,12 @@ public void evaluate(VectorizedRowBatch batch) { outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 946b738..b8d791d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -86,21 +87,30 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } } else if (inputColVector2.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); @@ -108,6 +118,15 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { /* there are nulls */ + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 9a8177c..e9100cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -80,8 +80,8 @@ public void evaluate(VectorizedRowBatch batch) { DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColumnVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; HiveDecimalWritable[] vector = inputColumnVector.vector; long[] outputVector = outputColVector.vector; @@ -91,49 +91,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColumnVector.noNulls; + if (inputColumnVector.noNulls) { if (inputColumnVector.isRepeating) { // All must be selected otherwise size would be zero // Repeating property will not change. outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; + outputIsNull[0] = false; outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (inputColumnVector.isRepeating) { //All must be selected otherwise size would be zero //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outNulls[0] = false; + outputIsNull[0] = false; } else { - outNulls[0] = true; + outputIsNull[0] = true; } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 791d8f2..9976e4a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,33 +60,52 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; @@ -99,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +127,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index ba83b6a..c4d72d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -65,8 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; double[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -76,49 +76,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + if (inputColVector.noNulls) { if (inputColVector.isRepeating) { // All must be selected otherwise size would be zero // Repeating property will not change. outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; + outputIsNull[0] = false; outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (inputColVector.isRepeating) { // All must be selected otherwise size would be zero // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; + outputIsNull[0] = false; } else { - outNulls[0] = true; + outputIsNull[0] = true; } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java index c8b1dad..10fbc0b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java @@ -65,8 +65,15 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inputColVector.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.isRepeating) { outV.isRepeating = true; func(outV, vector, 0); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index 28d800e..4d0a649 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,34 +59,52 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; @@ -99,7 +119,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +126,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 5fb9778..bcba6bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -70,34 +72,48 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; @@ -112,7 +128,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -120,7 +135,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index f518f39..b0160ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,34 +60,48 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index e632ff9..4ec56fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,34 +59,52 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index d500612..248878e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,34 +59,48 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index f93dbfc..edf6b8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -73,32 +74,51 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inputColVector.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; prepareResult(0, vector, outV); } else if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; prepareResult(i, vector, outV); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { prepareResult(i, vector, outV); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inputColVector.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inputColVector.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java index 1a94408..bdbf4d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java index d289dff..96ce35d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index ff8593e..5c52d5b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -68,12 +68,17 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - round(0, vector[0], decimalPlaces, outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + round(0, vector[0], decimalPlaces, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { @@ -92,8 +97,16 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -106,7 +119,6 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java index d474ff0..7e0ff9e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -55,32 +57,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { func(outV, inV, i); } outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 93cf1ec..752e097 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,34 +60,47 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; @@ -100,7 +115,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -108,7 +122,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index 9eb4312..6737443 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -59,34 +61,47 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.noNulls) { - outV.noNulls = true; if (inV.isRepeating) { outV.isRepeating = true; + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + */ outV.noNulls = false; + if (inV.isRepeating) { outV.isRepeating = true; outV.isNull[0] = inV.isNull[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index f9b3f76..c59a0cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -64,39 +64,67 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { - outputColVector.setElement(0, 0, arg2ColVector); + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); - } else { - outputColVector.noNulls = false; - isNull[i] = true; + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } } } } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); - } else { - outputColVector.noNulls = false; - isNull[i] = true; + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index e7d4e4d..5ffa3d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -65,8 +65,10 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; double[] vector2 = arg2ColVector.vector; @@ -85,7 +87,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,6 +95,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index fa7b2da..9a8ba72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -65,8 +65,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -82,7 +84,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -90,6 +92,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 487fb97..7904f57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -67,8 +67,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,14 +80,20 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { - outputColVector.fill(arg3Scalar); + outputColVector.set(0, arg3Scalar); + outputColVector.isRepeating = true; } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 + } + // Extend any repeating values and noNulls indicator in the inputs to // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 7b18cf8..3ced4e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -67,8 +67,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,14 +80,20 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.set(0, arg2Scalar); + outputColVector.isRepeating = true; } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector + } + // Extend any repeating values and noNulls indicator in the inputs to // reduce the number of code paths needed below. // This could be optimized in the future by having separate paths diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 0ba6722..fb57c8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,12 +81,16 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.set(0, arg2Scalar); } else { - outputColVector.fill(arg3Scalar); + outputColVector.set(0, arg3Scalar); } - } else if (arg1ColVector.noNulls) { + outputColVector.isRepeating = true; + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 0c8a2f6..7ea3046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; long[] vector2 = arg2ColVector.vector; @@ -92,6 +94,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 85c37f9..a19f033 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -54,6 +54,9 @@ public void evaluate(VectorizedRowBatch batch) { final ColumnVector arg2ColVector = batch.cols[arg2Column]; final ColumnVector outputColVector = batch.cols[outputColumnNum]; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + final int[] sel = batch.selected; final int n = batch.size; final boolean[] null1 = arg1ColVector.isNull; @@ -64,39 +67,63 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } else { - outputColVector.setElement(0, 0, arg2ColVector); + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; - } else { - outputColVector.setElement(i, i, arg2ColVector); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } } } } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; - } else { - outputColVector.setElement(i, i, arg2ColVector); + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + outputColVector.noNulls = false; + isNull[i] = true; + } else { + outputColVector.setElement(i, i, arg2ColVector); + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java new file mode 100644 index 0000000..5a68cec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprNullNull extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public IfExprNullNull(int outputColumnNum) { + super(outputColumnNum); + } + + public IfExprNullNull() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + + // We do not need to do a column reset since we are carefully changing the output. + + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + + @Override + public String vectorExpressionParameters() { + return "null, null"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index 09aa9ab..6214357 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -67,8 +67,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -86,7 +88,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -94,6 +96,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 9167178..3121200 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -69,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -88,14 +90,20 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { - outputColVector.fill(arg3Scalar); + outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); + outputColVector.isRepeating = true; } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 84d0052..bd93b1a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -70,8 +70,10 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -89,14 +91,20 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); + outputColVector.isRepeating = true; } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the input arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 5ed457b..92fc148 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -67,8 +67,10 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - outputColVector.noNulls = true; // output must be a scalar and neither one is null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,11 +82,12 @@ public void evaluate(VectorizedRowBatch batch) { outputColVector.initBuffer(); if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { - outputColVector.fill(arg3Scalar); + outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); } + outputColVector.isRepeating = true; return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index ee3cd19..b115134 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +83,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -89,6 +91,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + } + // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index b98ddbe..c03d809 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -70,8 +70,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,14 +83,20 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { - outputColVector.fill(arg3Scalar); + outputColVector.set(0, arg3Scalar); + outputColVector.isRepeating = true; } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 + } + // Extend any repeating values and noNulls indicator in the inputs to // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index abd585d..5d5e248 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -69,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,14 +82,20 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.set(0, arg2Scalar); + outputColVector.isRepeating = true; } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } + // Carefully update noNulls... + if (outputColVector.noNulls) { + outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector + } + // Extend any repeating values and noNulls indicator in the inputs to // reduce the number of code paths needed below. // This could be optimized in the future by having separate paths diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 24299e9..e456572 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,12 +81,16 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.set(0, arg2Scalar); } else { - outputColVector.fill(arg3Scalar); + outputColVector.set(0, arg3Scalar); } - } else if (arg1ColVector.noNulls) { + outputColVector.isRepeating = true; + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index 6b141d1..c7245aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -61,8 +61,10 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { outputVector[0] = 1; batch.cols[outputColumnNum].isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 7347800..9516205 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -60,8 +60,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { outputVector[0] = 0; batch.cols[outputColumnNum].isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java index 62860df..1078c4f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -53,7 +53,10 @@ public void evaluate(VectorizedRowBatch batch) { ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum]; ColumnVector childV = listV.child; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index 242fddc..33a44b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -45,6 +47,12 @@ public LongColEqualLongScalar() { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -52,55 +60,84 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] == value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] == value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] == value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] == value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] == value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] == value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] == value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index 633015e..bce3e4f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { outputVector[0] = vector[0] >= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] >= value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] >= value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] >= value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] >= value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] >= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 25c07df..250328e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { outputVector[0] = vector[0] > value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] > value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] > value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] > value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] > value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] > value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 1e5b349..fec4544 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -53,8 +53,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +64,77 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { outputVector[0] = vector[0] <= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] <= value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] <= value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] <= value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] <= value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] <= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index 2f282a9..7e86386 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { outputVector[0] = vector[0] < value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] < value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] < value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] < value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] < value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] < value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 0e78f8d..b903888 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { outputVector[0] = vector[0] != value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] != value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] != value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] != value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] != value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] != value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 8d915c2..bb1ea32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] == value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value == vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value == vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value == vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value == vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value == vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value == vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index a06fb08..18431a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] >= value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value >= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value >= vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value >= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value >= vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value >= vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value >= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index 6610288..5b6ffce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] > value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value > vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value > vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value > vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value > vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value > vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value > vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index 7a305d3..c5dfa7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] <= value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value <= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value <= vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value <= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value <= vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value <= vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value <= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index 763dfdf..9baced8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] < value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value < vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value < vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value < vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value < vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value < vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value < vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index aecaed2..7160403 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,78 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputVector[0] = vector[0] != value ? 1 : 0; + outputIsNull[0] = false; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value != vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value != vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value != vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value != vector[i] ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value != vector[0] ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + + /* + * Our current output column does not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value != vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index c52e337..56f837a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -65,19 +65,26 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + boolean[] isNull = outV.isNull; + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; + isNull[0] = false; func(outV, vector, 0); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + isNull[i] = false; func(outV, vector, i); } outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + isNull[i] = false; func(outV, vector, i); } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 30f20f3..b489434 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -60,8 +60,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; // 0 XOR 1 yields 1, 1 XOR 1 yields 0 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java index bfd7334..6450a0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -60,8 +60,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; resultLen[0] = length[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index f730c9d..2bac745 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -72,8 +72,11 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index cbdcc76..e70883c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -321,8 +321,9 @@ public void evaluate(VectorizedRowBatch batch) { } } else { // there are no nulls in either input vector - // propagate null information - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // perform data operation if (inV1.isRepeating && inV2.isRepeating) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 9b9c063..c0761e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -66,8 +66,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index 94fbef8..53cfe16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -72,8 +72,11 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 5934f6f..c3b0094 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -139,6 +139,10 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inV.start; outV.initBuffer(); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inV.isRepeating) { outV.isRepeating = true; if (!inV.noNulls && inV.isNull[0]) { @@ -147,7 +151,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); return; } else { - outV.noNulls = true; int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx); if (offset != -1) { outV.setVal(0, vector[0], offset, len[0] - (offset - start[0])); @@ -177,7 +180,6 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { - outV.noNulls = true; for (int i = 0; i != n; ++i) { int selected = sel[i]; int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], @@ -205,7 +207,6 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { - outV.noNulls = true; for (int i = 0; i != n; ++i) { int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); if (offset != -1) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 9d6eccf..34fd64c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -168,7 +168,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); return; } else { - outV.noNulls = true; populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray); if (offsetArray[0] != -1) { outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]); @@ -197,7 +196,6 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { - outV.noNulls = true; for (int i = 0; i != n; ++i) { int selected = sel[i]; outV.isNull[selected] = false; @@ -225,7 +223,6 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { - outV.noNulls = true; for (int i = 0; i != n; ++i) { outV.isNull[i] = false; populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 544b700..5c378fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -86,8 +86,11 @@ public void evaluate(VectorizedRowBatch batch) { // It's implemented in the simplest way now, just calling the // existing built-in function. + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; s.set(vector[0], start[0], length[0]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 2f8b627..ad1d186 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -67,8 +67,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; func(outV, vector, start, length, 0); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 5eb2090..3e21f48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -64,8 +64,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; func(outV, inputColVector, 0); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index e232555..7d5ecba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -92,10 +92,13 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + switch (primitiveCategory) { case DATE: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -131,7 +134,6 @@ public void evaluate(VectorizedRowBatch batch) { case TIMESTAMP: if (inputCol.noNulls) { - outV.noNulls = true; if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -169,7 +171,6 @@ public void evaluate(VectorizedRowBatch batch) { case CHAR: case VARCHAR: if (inputCol.noNulls) { - outV.noNulls = true; if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 0aaba26..c8ea3ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -133,9 +133,12 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 97e3669..667f455 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -95,6 +95,10 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { case DATE: @@ -135,7 +139,6 @@ public void evaluate(VectorizedRowBatch batch) { switch (primitiveCategory0) { case DATE: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -171,7 +174,6 @@ public void evaluate(VectorizedRowBatch batch) { case TIMESTAMP: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -209,7 +211,6 @@ public void evaluate(VectorizedRowBatch batch) { case CHAR: case VARCHAR: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c575c05..01657fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -93,6 +93,10 @@ public void evaluate(VectorizedRowBatch batch) { /* true for all algebraic UDFs with no state */ outV.isRepeating = inputCol.isRepeating; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); switch (primitiveCategory0) { @@ -135,7 +139,6 @@ public void evaluate(VectorizedRowBatch batch) { switch (primitiveCategory1) { case DATE: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -171,7 +174,6 @@ public void evaluate(VectorizedRowBatch batch) { case TIMESTAMP: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; @@ -209,7 +211,6 @@ public void evaluate(VectorizedRowBatch batch) { case CHAR: case VARCHAR: if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java index 9d72bdf..44fa1e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -55,7 +55,10 @@ public void evaluate(VectorizedRowBatch batch) { // indexColumnVector includes the keys of Map indexColumnVector = batch.cols[indexColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -73,7 +76,6 @@ public void evaluate(VectorizedRowBatch batch) { // the key is found in MapColumnVector, set the value outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); outV.isNull[0] = false; - outV.noNulls = true; } outV.isRepeating = true; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java index e6a86ae..9df891c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -50,7 +50,10 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector outV = batch.cols[outputColumnNum]; MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -65,7 +68,6 @@ public void evaluate(VectorizedRowBatch batch) { } else { // the key is found in MapColumnVector, set the value outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); - outV.noNulls = true; } } outV.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 519a4e4..c9a21b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -100,8 +100,11 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector longColVector = (LongColumnVector) inputColVec; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index c5762d1..b6e4504 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -109,8 +109,11 @@ public void evaluate(VectorizedRowBatch batch) { // true for all algebraic UDFs with no state outV.isRepeating = inputCol.isRepeating; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 54cb5d8..fddd79b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -100,8 +100,11 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java index ed11a09..4192233 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -86,6 +86,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -93,7 +97,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0].set(firstValue); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index 9ceeb13..6019fdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -49,9 +49,12 @@ public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpressi public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { evaluateInputExpr(batch); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = denseRank; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java index 9f65de4..ac7fad7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -82,6 +82,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +93,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java index 5151ecb..1517a48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -82,6 +82,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +93,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java index f7080e5..34add61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -50,9 +50,12 @@ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression in public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { evaluateInputExpr(batch); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = rank; groupCount += batch.size; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index f23a8b3..607d34a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -178,6 +178,11 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } private void fillGroupResults(VectorizedRowBatch batch) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + for (VectorPTFEvaluatorBase evaluator : evaluators) { final int outputColumnNum = evaluator.getOutputColumnNum(); if (evaluator.streamsResult()) { @@ -190,7 +195,6 @@ private void fillGroupResults(VectorizedRowBatch batch) { if (isGroupResultNull) { outputColVector.noNulls = false; } else { - outputColVector.noNulls = true; switch (evaluator.getResultColumnVectorType()) { case LONG: ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupResult(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index a1a1282..82b7a15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -140,7 +140,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - batch.cols[outputColumnNum].noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ /* If all input columns are repeating, just evaluate function * for row 0 in the batch and set output repeating. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 190771e..81b8826 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1701,7 +1701,7 @@ private boolean validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInf private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing MapWork..."); + LOG.info("Validating and vectorizing MapWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); // Set "global" member indicating where to store "not vectorized" information if necessary. currentBaseWork = mapWork; @@ -1905,7 +1905,7 @@ private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing ReduceWork..."); + LOG.info("Validating and vectorizing ReduceWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); Operator newVectorReducer; try { @@ -4101,9 +4101,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { for (int i = 0; i < size; i++) { ExprNodeDesc expr = colList.get(i); VectorExpression ve = vContext.getVectorExpression(expr); - if (ve.getOutputColumnNum() == -1) { - fake++; - } projectedOutputColumns[i] = ve.getOutputColumnNum(); if (ve instanceof IdentityExpression) { // Suppress useless evaluation. diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index b743e64..9530165 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * This class supports string and binary data by value reference -- i.e. each field is @@ -309,8 +311,11 @@ public void increaseBufferSpace(int nextElemLength) { /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + BytesColumnVector output = (BytesColumnVector) outputColVector; // Output has nulls if and only if input has nulls. output.noNulls = noNulls; @@ -390,6 +395,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // Fill the all the vector entries with provided value public void fill(byte[] value) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; setRef(0, value, 0, value.length); diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index bce0bd7..cfff662 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -181,6 +181,11 @@ protected void flattenPush() { public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public abstract void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); + /** * Initialize the column vector. This method can be overridden by specific column vector types. * Use this method only if the individual type of the column vector is not known, otherwise its diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index e41e19f..dcf2ccb 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -51,6 +53,9 @@ public DecimalColumnVector(int size, int precision, int scale) { // Fill the all the vector entries with provided value public void fill(HiveDecimal value) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; if (vector[0] == null) { @@ -149,4 +154,66 @@ public void shallowCopyTo(ColumnVector otherCv) { other.precision = precision; other.vector = vector; } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + @Override + public void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DecimalColumnVector output = (DecimalColumnVector) outputColVector; + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (isNull[0]) { + output.vector[0].setFromLong(0); + } else { + output.set(0, vector[0]); + } + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.set(i, vector[i]); + } + } else { + for (int i = 0; i < size; i++) { + output.set(i, vector[i]); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + if (!isNull[i]) { + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index e04af01..e81921d 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -54,8 +54,11 @@ public DoubleColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DoubleColumnVector output = (DoubleColumnVector) outputColVector; // Output has nulls if and only if input has nulls. output.noNulls = noNulls; @@ -98,6 +101,9 @@ public void copySelected( // Fill the column vector with the provided value public void fill(double value) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; vector[0] = value; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index f813b1b..49c01e4 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -237,6 +237,7 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { this.totalSeconds[elementNum] = intervalDayTime.getTotalSeconds(); this.nanos[elementNum] = intervalDayTime.getNanos(); + isNull[elementNum] = false; } /** @@ -246,6 +247,7 @@ public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { public void setFromScratchIntervalDayTime(int elementNum) { this.totalSeconds[elementNum] = scratchIntervalDayTime.getTotalSeconds(); this.nanos[elementNum] = scratchIntervalDayTime.getNanos(); + isNull[elementNum] = false; } /** @@ -260,8 +262,11 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + IntervalDayTimeColumnVector output = (IntervalDayTimeColumnVector) outputColVector; // Output has nulls if and only if input has nulls. output.noNulls = noNulls; @@ -310,6 +315,9 @@ public void copySelected( * @param intervalDayTime */ public void fill(HiveIntervalDayTime intervalDayTime) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; totalSeconds[0] = intervalDayTime.getTotalSeconds(); diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 7ecb1e0..b36baac 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -116,4 +116,10 @@ public void unFlatten() { } } + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } + } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 49e9184..f065b0e 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -54,44 +54,88 @@ public LongColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; - output.isRepeating = false; + LongColumnVector output = (LongColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + for (int i = 0; i < size; i++) { + output.vector[i] = vector[i]; + } } } } @@ -101,49 +145,91 @@ public void copySelected( public void copySelected( boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; // automatic conversion to double is done here - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; // automatic conversion to double is done here + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; - } - } - else { - for(int i = 0; i < size; ++i) { - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } } - } + } else { + + // Carefully handle NULLs... + + /* + * Our current output column may not have NULL flags set. Turn off + * the flag even though for the selectedInUse case it may not apply anymore. + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + for (int i = 0; i < size; i++) { + output.vector[i] = vector[i]; + } } } } // Fill the column vector with the provided value public void fill(long value) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; vector[0] = value; diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index 078c9c1..cbb8ee2 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -128,4 +128,10 @@ public void unFlatten() { values.unFlatten(); } } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java index b65c802..54989eb 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -134,4 +134,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 0e7f86f..4b87796 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -311,8 +311,11 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + TimestampColumnVector output = (TimestampColumnVector) outputColVector; // Output has nulls if and only if input has nulls. output.noNulls = noNulls; @@ -361,6 +364,9 @@ public void copySelected( * @param timestamp */ public void fill(Timestamp timestamp) { + if (!noNulls) { + Arrays.fill(isNull, false); + } noNulls = true; isRepeating = true; time[0] = timestamp.getTime(); diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java index 448461b..9ace7f3 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -142,4 +142,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 42c7e8f..ea13c24 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -136,6 +136,50 @@ private static String toUTF8(Object o) { return o.toString(); } + public String stringifyColumn(int columnNum) { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + b.append("columnNum "); + b.append(columnNum); + b.append(", size "); + b.append(size); + b.append(", selectedInUse "); + b.append(selectedInUse); + ColumnVector colVector = cols[columnNum]; + b.append(", noNulls "); + b.append(colVector.noNulls); + b.append(", isRepeating "); + b.append(colVector.isRepeating); + b.append('\n'); + + final boolean noNulls = colVector.noNulls; + final boolean[] isNull = colVector.isNull; + if (colVector.isRepeating) { + final boolean hasRepeatedValue = (noNulls || !isNull[0]); + for (int i = 0; i < size; i++) { + if (hasRepeatedValue) { + colVector.stringifyValue(b, 0); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } else { + for (int i = 0; i < size; i++) { + final int batchIndex = (selectedInUse ? selected[i] : i); + if (noNulls || !isNull[batchIndex]) { + colVector.stringifyValue(b, batchIndex); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } + return b.toString(); + } + @Override public String toString() { if (size == 0) {