diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt index b5011c3..06f6c44 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,16 +66,11 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt index ae0d348..0d20c78 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt index cbec1ab..1e7677b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,45 +70,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt index d5aef78..59614f1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnArithmeticScalarDecimal.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,68 +72,94 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, vector[0], value, outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt index fd31672..a2d7a8a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,30 +66,25 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - - // Handle nulls first + + // Handle nulls first NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); - + /* Disregard nulls for processing. In other words, - * the arithmetic operation is performed even if one or + * the arithmetic operation is performed even if one or * more inputs are null. This is to improve speed by avoiding * conditional checks in the inner loop. - */ + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputVector[0] = vector1[0] vector2[0] ? 1 : 0; } else if (inputColVector1.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt index 51e6994..372f943 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnCompareScalar.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,92 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector[i] value ? 1 : 0; - } + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index 3e95557..a1bdf65 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,16 +66,11 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt index 2be16cc..c15049a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumnDecimal.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,16 +67,12 @@ public class extends VectorExpression { DecimalColumnVector inputColVector1 = (DecimalColumnVector) batch.cols[colNum1]; DecimalColumnVector inputColVector2 = (DecimalColumnVector) batch.cols[colNum2]; DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outputColVector.isNull; int[] sel = batch.selected; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] @@ -103,10 +105,12 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } else { for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. DecimalUtil.Checked(i, vector1[0], vector2[i], outputColVector); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt index 159a61e..b9b038e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; @@ -53,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,50 +70,88 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; } else if (inputColVector.isRepeating) { - outputVector[0] = vector[0] value; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] value; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] value; + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] value; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector[i] value; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] value; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector[i] value; + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = vector[i] value; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = vector[i] value; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt index 2631468..1d7603e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideScalarDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,28 +72,12 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; +; HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (value.compareTo(HiveDecimal.ZERO) == 0) { @@ -93,45 +85,90 @@ public class extends VectorExpression { outputColVector.noNulls = false; outputColVector.isRepeating = true; outputIsNull[0] = true; + return; } else if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, vector[0], value, outputColVector); + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, vector[0], value, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - // copy isNull entries first because operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, vector[i], value, outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, vector[i], value, outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt index 811f6db..01d1795 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -44,6 +47,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -53,49 +62,83 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = ( vector[0]); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = ( vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = ( vector[i]); + + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = ( vector[i]); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = ( vector[i]); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ( vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { + for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = ( vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = ( vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ( vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index f0ab471..2423457 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -47,6 +50,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -56,49 +65,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = - vector[0]; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = - vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = -vector[i]; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = -vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = -vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = -vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = -vector[i]; + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - outputVector[i] = -vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = -vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = -vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; + } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt index 027e6ed..4b7d992 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIColumnCompareScalar.txt @@ -21,6 +21,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; /** * Generated from template DTIColumnCompareScalar.txt, which covers comparison @@ -40,6 +42,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt index ebc0d8a..d038b2b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DTIScalarCompareColumn.txt @@ -18,9 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.io.LongWritable; +import java.sql.Date; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -45,6 +44,11 @@ public class extends { } @Override + public String vectorExpressionParameters() { + + } + + @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) .setMode( diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt index 5db9a0b..5b180b3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,16 +81,11 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt index bf4b24c..bb246c0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticIntervalYearMonthScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchDate1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchDate1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchDate1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchDate1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt index 847ebac..7957b63 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,13 +81,8 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] vector1 = inputColVector1.vector; outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating diff --git ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt index 180bebc..d1c2feb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,58 +81,107 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); - dtm.( - scratchTimestamp1, value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); + dtm.( + scratchTimestamp1, value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt index 4f12315..680f2f8 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticIntervalYearMonthColumn.txt @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Date; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt index a6fa2ac..79473cc 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DateScalarArithmeticTimestampColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -75,6 +76,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,52 +95,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt index 30b03ba..c3f8022 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Column.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,17 +68,12 @@ public class extends VectorExpression { Decimal64ColumnVector inputColVector2 = (Decimal64ColumnVector) batch.cols[colNum2]; Decimal64ColumnVector outputColVector = (Decimal64ColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt index 81dcf33..866cd51 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ColumnArithmeticDecimal64Scalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,95 +72,133 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* - * Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[0] value; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[0] value; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputColVector.noNulls = false; + outputIsNull[i] = true; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = vector[i] value; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = vector[i] value; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt index dc6ccb9..c768e89 100644 --- ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt +++ ql/src/gen/vectorization/ExpressionTemplates/Decimal64ScalarArithmeticDecimal64Column.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -56,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,95 +73,131 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - final long outputDecimal64AbsMax = HiveDecimalWritable.getDecimal64AbsMax(outputColVector.precision); - if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[0]; - outputVector[0] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[0]; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } else { + outputVector[0] = result; + } + } else { outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } else { - for(int i = 0; i != n; i++) { - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; - outputIsNull[i] = true; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for(int i = 0; i != n; i++) { + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - final long result = value vector[i]; - outputVector[i] = result; - if (Math.abs(result) > outputDecimal64AbsMax) { - outputColVector.noNulls = false; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + final long result = value vector[i]; + outputVector[i] = result; + if (Math.abs(result) > outputDecimal64AbsMax) { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } + } else { outputIsNull[i] = true; + outputColVector.noNulls = false; } } } } - - // Currently, we defer division, etc to regular HiveDecimal so we don't do any null - // default value setting here. } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt index 1ab5228..a7622ea 100644 --- ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt +++ ql/src/gen/vectorization/ExpressionTemplates/DecimalColumnUnaryFunc.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -27,7 +30,6 @@ import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import java.util.Arrays; public class extends VectorExpression { private static final long serialVersionUID = 1L; @@ -49,6 +51,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { this.evaluateChildren(batch); } @@ -58,54 +66,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - if (inputColVector.isRepeating) { + HiveDecimalWritable[] vector = inputColVector.vector; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - (0, vector[0], outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + (0, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; + return; + } - // Set isNull because decimal operation can yield a null. - outputIsNull[i] = false; - (i, vector[i], outputColVector); + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } else { - - // Set isNull because decimal operation can yield a null. - Arrays.fill(outputIsNull, 0, n, false); - for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - (i, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + (i, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } - outputColVector.isRepeating = false; } } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt index 36ad892..55eb50e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -57,28 +57,27 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. + if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -105,12 +104,9 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((vector[0] < leftValue || vector[0] > rightValue)) { - + // Entire batch is filtered out. batch.size = 0; } @@ -121,7 +117,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } @@ -132,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((leftValue <= vector[i] && vector[i] <= rightValue)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt index 150d341..e458992 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // filter rows with NULL on left input int newSize; newSize = NullUtil.filterNulls(batch.cols[colNum1], batch.selectedInUse, sel, n); @@ -85,12 +86,9 @@ public class extends VectorExpression { n = batch.size = newSize; batch.selectedInUse = true; } - + // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt index a9ddeca..c955c06 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterColumnCompareScalar.txt @@ -51,25 +51,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -97,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(vector[0] value)) { //Entire batch is filtered out. batch.size = 0; @@ -111,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } @@ -122,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (vector[i] value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt index 7c41f3e..f42668c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnBetween.txt @@ -60,26 +60,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -108,10 +106,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((DecimalUtil.compare(vector[0], leftValue) < 0 || DecimalUtil.compare(vector[0], rightValue) > 0)) { // Entire batch is filtered out. @@ -124,19 +119,19 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } } } - + // Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((DecimalUtil.compare(leftValue, vector[i]) <= 0 && DecimalUtil.compare(vector[i], rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt index 6a82183..77fe7ae 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + HiveDecimalWritable[] vector1 = inputColVector1.vector; HiveDecimalWritable[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt index 80a19d9..078b132 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalColumnCompareDecimalScalar.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(vector[0], value) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(vector[i], value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt index 4b7e849..20dbaba 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterDecimalScalarCompareDecimalColumn.txt @@ -53,24 +53,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } DecimalColumnVector inputColVector = (DecimalColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - HiveDecimalWritable[] vector = inputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + HiveDecimalWritable[] vector = inputColVector.vector; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -99,9 +99,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(DecimalUtil.compare(value, vector[0]) 0)) { // Entire batch is filtered out. @@ -114,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } @@ -126,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (DecimalUtil.compare(value, vector[i]) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt index f741409..4afed54 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleColumnCompareTimestampColumn.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,13 +71,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector1 = inputColVector1.vector; // filter rows with NULL on left input int newSize; @@ -90,9 +91,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(vector1[0] inputColVector2.(0))) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt index 8ece14f..8f8104d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterLongDoubleScalarCompareTimestampColumn.txt @@ -57,24 +57,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -102,9 +100,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value inputColVector.(0))) { //Entire batch is filtered out. batch.size = 0; @@ -116,7 +112,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } @@ -127,7 +123,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value inputColVector.(i)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt index 18840f1..28b5704 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterScalarCompareColumn.txt @@ -52,25 +52,24 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } inputColVector = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + [] vector = inputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +97,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(value vector[0])) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +109,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } @@ -123,7 +120,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (value vector[i]) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index b9a332a..b7f70e1 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -56,27 +56,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -108,9 +107,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -124,20 +121,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt index 0f0cb2e..5b7acad 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,27 +67,22 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; + byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; int[] start2 = inputColVector2.start; int[] length1 = inputColVector1.length; int[] length2 = inputColVector2.length; - - // return immediately if batch is empty - if (n == 0) { - return; - } - + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - + /* Either all must remain selected or all will be eliminated. * Repeating property will not change. */ - if (!((vector1[0], start1[0], length1[0], + if (!((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { batch.size = 0; } @@ -90,7 +91,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -99,7 +100,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -114,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -123,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -137,7 +138,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -146,7 +147,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -173,7 +174,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -184,7 +185,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -206,7 +207,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -215,7 +216,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -231,7 +232,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -242,7 +243,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -259,7 +260,7 @@ public class extends VectorExpression { } else if (inputColVector2.noNulls) { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { batch.size = 0; return; @@ -275,7 +276,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -284,7 +285,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -300,7 +301,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -311,7 +312,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -328,7 +329,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -339,7 +340,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -356,7 +357,7 @@ public class extends VectorExpression { } else { if (inputColVector1.isRepeating && inputColVector2.isRepeating) { if (nullPos1[0] || nullPos2[0] || - !((vector1[0], start1[0], length1[0], + !((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0]))) { batch.size = 0; } @@ -370,7 +371,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -381,7 +382,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -402,7 +403,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -413,7 +414,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { sel[newSize++] = i; } @@ -430,7 +431,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } @@ -441,7 +442,7 @@ public class extends VectorExpression { int newSize = 0; for(int i = 0; i != n; i++) { if (!nullPos1[i] && !nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt index a85a889..76ec8a0 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -52,27 +52,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -101,9 +100,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((vector[0], start[0], length[0], value, 0, value.length))) { //Entire batch is filtered out. @@ -116,19 +113,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((vector[i], start[i], length[i], value, 0, value.length)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt index f3d1e58..91d8da5c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -53,27 +53,26 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -102,9 +101,7 @@ public abstract class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!((value, 0, value.length, vector[0], start[0], length[0]))) { //Entire batch is filtered out. @@ -117,19 +114,19 @@ public abstract class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((value, 0, value.length, vector[i], start[i], length[i])) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt index 53bf271..604060a 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt @@ -59,25 +59,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -106,10 +103,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. - // Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((inputColVector.compareTo(0, leftValue) < 0 || inputColVector.compareTo(0, rightValue) > 0)) { // Entire batch is filtered out. @@ -122,7 +116,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } @@ -134,7 +128,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((inputColVector.compareTo(leftValue, i) <= 0 && inputColVector.compareTo(i, rightValue) <= 0)) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt index eaa58c7..f9bc9ee 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,13 +68,8 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - [] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } + [] vector2 = inputColVector2.vector; // filter rows with NULL on left input int newSize; @@ -87,9 +88,6 @@ public class extends VectorExpression { // All rows with nulls have been filtered out, so just do normal filter for non-null case if (n != 0 && inputColVector1.isRepeating && inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. if (!(inputColVector1.(0) vector2[0])) { batch.size = 0; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt index 2e38269..fc1be95 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleScalar.txt @@ -53,24 +53,22 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector.isNull; if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -98,9 +96,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector.(0) value)) { //Entire batch is filtered out. batch.size = 0; @@ -112,7 +108,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } @@ -123,7 +119,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector.(i) value) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt index 697e3ef..0a541f9 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampColumn.txt @@ -57,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,12 +76,6 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos1 = inputColVector1.isNull; boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt index 435316d..68e0006 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareTimestampScalar.txt @@ -56,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,18 +70,10 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector1.isNull; if (inputColVector1.noNulls) { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -103,9 +101,7 @@ public class extends VectorExpression { } } else { if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector1.compareTo(0, value) 0)) { //Entire batch is filtered out. batch.size = 0; @@ -117,7 +113,7 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } @@ -128,7 +124,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector1.compareTo(i, value) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt index 4887ad2..d5952de 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampScalarCompareTimestampColumn.txt @@ -56,6 +56,13 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -63,18 +70,10 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } + boolean[] inputIsNull = inputColVector2.isNull; if (inputColVector2.noNulls) { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -103,9 +102,7 @@ public class extends VectorExpression { } } else { if (inputColVector2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if (!(inputColVector2.compareTo(value, 0) 0)) { // Entire batch is filtered out. @@ -118,7 +115,7 @@ public class extends VectorExpression { int newSize = 0; for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } @@ -130,7 +127,7 @@ public class extends VectorExpression { } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if (inputColVector2.compareTo(value, i) 0) { sel[newSize++] = i; } diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt index cc86a3e..44e8e18 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -58,27 +58,26 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - if (inputColVector.noNulls) { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -110,9 +109,7 @@ public class extends VectorExpression { } } else { if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { + if (!inputIsNull[0]) { if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { @@ -126,20 +123,20 @@ public class extends VectorExpression { int newSize = 0; for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; } } } - + //Change the selected vector batch.size = newSize; } else { int newSize = 0; for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { sel[newSize++] = i; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 52f1d9e..5bab540 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,18 +74,14 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector2 = arg2ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,22 +95,26 @@ public class extends VectorExpression { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); } } } else /* there are nulls */ { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 1693e8f..df41b5f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -68,18 +74,14 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] vector3 = arg3ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { outputColVector.fill(arg2Scalar); @@ -95,18 +97,25 @@ public class extends VectorExpression { // for when arg3ColVector is repeating or has no nulls. arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } else { for(int i = 0; i != n; i++) { outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index ebdfe47..cd532f3 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,48 +74,101 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = arg1ColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + return; + } + + if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } } } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); - outputIsNull[i] = false; + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * Since we always generate a result without NULLs, we can optimize this case similar to + * the optimization above... + */ + + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2Scalar : arg3Scalar); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt index 9767973..940c360 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateColumn.txt @@ -61,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -75,22 +81,17 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; - // Handle nulls first + // Handle nulls first NullUtil.propagateNullsColCol( inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt index ca5829c..93eb5eb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticDateScalar.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -61,6 +63,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,59 +82,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + outputIsNull[i] = false; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt index d6e45ac..bdae745 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,16 +79,10 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] || inputColVector2.isRepeating && !inputColVector2.noNulls && inputColVector2.isNull[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt index 6e232e7..d005b8b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -60,6 +61,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,59 +80,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector1 = inputColVector1.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth1.set((int) vector1[0]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - scratchIntervalYearMonth1.set((int) vector1[0]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth1.set((int) vector1[i]); - dtm.( - scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth1.set((int) vector1[i]); + dtm.( + scratchIntervalYearMonth1, value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt index 041a651..ec9ea01 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticDateColumn.txt @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Date; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -75,6 +77,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -88,59 +96,109 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + long[] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchDate2, outputDate); + outputVector[0] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchDate2, outputDate); - outputVector[0] = DateWritable.dateToDays(outputDate); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } else { - for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } + } else { + for(int i = 0; i != n; i++) { + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchDate2, outputDate); - outputVector[i] = DateWritable.dateToDays(outputDate); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchDate2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchDate2, outputDate); + outputVector[i] = DateWritable.dateToDays(outputDate); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt index f2d4eaf..81b1406 100644 --- ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/IntervalYearMonthScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -72,6 +73,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -85,52 +92,101 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratchTimestamp(0), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratchTimestamp(i), outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + outputIsNull[i] = inputIsNull[i]; + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt index bd2cbac..2d56625 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampColumn.txt @@ -54,6 +54,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,15 +68,10 @@ public class extends VectorExpression { TimestampColumnVector inputColVector2 = (TimestampColumnVector) batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt index 889c445..10c3601 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleColumnCompareTimestampScalar.txt @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -54,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,62 +69,92 @@ public class extends VectorExpression { inputColVector1 = () batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector1 = inputColVector1.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector1[0] value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector1[0] value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector1[i] value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector1[i] value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] value ? 1 : 0; - } - } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector1[0] value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] value ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + * NOTE: We can't avoid conditional statements for LONG/DOUBLE because of NULL + * comparison requirements. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = vector1[i] value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt index 4d79283..f692788 100644 --- ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/LongDoubleScalarCompareTimestampColumn.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; - long[] outputVector = outputColVector.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; - // return immediately if batch is empty - if (n == 0) { - return; - } + long[] outputVector = outputColVector.vector; + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value inputColVector.(0) ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value inputColVector.(i) ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value inputColVector.(i) ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value inputColVector.(0) ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value inputColVector.(i) ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value inputColVector.(i) ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt index e95baa6..cc8b8a7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumn.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,45 +82,82 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } if (inputColVector.isRepeating) { - outputVector[0] = value vector[0]; - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0]; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i]; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i]; + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i]; + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = value vector[i]; outputIsNull[i] = inputIsNull[i]; + outputVector[i] = value vector[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = value vector[i]; } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt index 3ffca6c..05ebc60 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarArithmeticColumnDecimal.txt @@ -15,9 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,60 +72,93 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullDataEntriesDecimal(outputColVector, batch.selectedInUse, sel, n); return; } - + if (inputColVector.noNulls) { - - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - if (inputColVector.isRepeating) { - if (!inputColVector.noNulls) { - outputIsNull[0] = inputIsNull[0]; - } - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(0, value, vector[0], outputColVector); - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputIsNull[i] = inputIsNull[i]; - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - - // The following may override a "false" null setting if an error or overflow occurs. - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt index 9f4ec50..b2c1909 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarCompareColumn.txt @@ -15,10 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -53,6 +56,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,63 +69,90 @@ public class extends VectorExpression { inputColVector = () batch.cols[colNum]; outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = value vector[0] ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = value vector[i] ? 1 : 0; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value vector[0] ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = value vector[i] ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = value vector[i] ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = value vector[i] ? 1 : 0; - } + outputIsNull[i] = false; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt index aa33354..a2020a6 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -65,6 +67,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,56 +82,88 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - boolean hasDivBy0 = false; if (inputColVector.isRepeating) { - denom = vector[0]; - outputVector[0] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + denom = vector[0]; + outputVector[0] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } else { - for(int i = 0; i != n; i++) { - denom = vector[i]; - outputVector[i] = value denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } + } else { + for(int i = 0; i != n; i++) { + denom = vector[i]; + outputVector[i] = value denom; + hasDivBy0 = hasDivBy0 || (denom == 0); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { denom = vector[i]; outputVector[i] = value denom; hasDivBy0 = hasDivBy0 || (denom == 0); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt index 650101c..8158136 100644 --- ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt +++ ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumnDecimal.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -64,67 +72,96 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; - outputColVector.isRepeating = inputColVector.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + HiveDecimalWritable[] vector = inputColVector.vector; HiveDecimalWritable[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(0, value, vector[0], outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } if (inputColVector.noNulls) { - /* Initialize output vector NULL values to false. This is necessary - * since the decimal operation may produce a NULL result even for - * a non-null input vector value, and convert the output vector - * to have noNulls = false; - */ - NullUtil.initOutputNullsToFalse(outputColVector, inputColVector.isRepeating, - batch.selectedInUse, sel, n); - } - - if (inputColVector.isRepeating) { - DecimalUtil.Checked(0, value, vector[0], outputColVector); - - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } else { - for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } + } else { + for(int i = 0; i != n; i++) { + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } } } } else /* there are nulls */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - - // copy isNull entry first because the operation may overwrite it - outputIsNull[i] = inputIsNull[i]; - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { - - // copy isNull entries first because the operation may overwrite them - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - DecimalUtil.Checked(i, value, vector[i], outputColVector); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + // The following may override a "false" null setting if an error or overflow occurs. + DecimalUtil.Checked(i, value, vector[i], outputColVector); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } } - - /* - * Null data entries are not set to a special non-zero value because all null math operations - * are checked, meaning that a zero-divide always results in a null result anyway. - */ } @Override diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt index 1b1db54..9b3cc3b 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; @@ -53,6 +53,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,7 +71,6 @@ public class extends VectorExpression { boolean[] nullPos2 = inputColVector2.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; byte[][] vector1 = inputColVector1.vector; byte[][] vector2 = inputColVector2.vector; int[] start1 = inputColVector1.start; @@ -74,17 +79,16 @@ public class extends VectorExpression { int[] length2 = inputColVector2.length; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.noNulls = true; + + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + // handle case where neither input has nulls if (inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = true; + + // TEMPORARILY: + outputColVector.reset(); + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; ret = (vector1[0], start1[0], length1[0], @@ -98,7 +102,7 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -107,7 +111,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -119,7 +123,7 @@ public class extends VectorExpression { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -128,7 +132,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -139,7 +143,7 @@ public class extends VectorExpression { } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -148,7 +152,7 @@ public class extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -156,15 +160,18 @@ public class extends VectorExpression { } } } - + // handle case where only input 2 has nulls } else if (inputColVector1.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos2[0]; if (!nullPos2[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -179,7 +186,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -191,7 +198,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -211,7 +218,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -221,7 +228,7 @@ public class extends VectorExpression { } else { for(int i = 0; i != n; i++) { outNull[i] = false; - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -235,7 +242,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -247,7 +254,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -255,17 +262,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where only input 1 has nulls } else if (inputColVector2.noNulls) { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos1[0]; if (!nullPos1[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -283,7 +293,7 @@ public class extends VectorExpression { for(int j = 0; j != n; j++) { int i = sel[j]; outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -293,7 +303,7 @@ public class extends VectorExpression { } else { for(int i = 0; i != n; i++) { outNull[i] = false; - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -307,7 +317,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -319,7 +329,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -334,7 +344,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -346,7 +356,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -354,17 +364,20 @@ public class extends VectorExpression { } } } - } + } } - + // handle case where both inputs have nulls } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos1[0] || nullPos2[0]; if (!outNull[0]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[0], start2[0], length2[0])) { outVector[0] = 1; } else { @@ -382,7 +395,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -394,7 +407,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos2[i]; if (!nullPos2[i]) { - if ((vector1[0], start1[0], length1[0], + if ((vector1[0], start1[0], length1[0], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -414,7 +427,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -426,7 +439,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i]; if (!nullPos1[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[0], start2[0], length2[0])) { outVector[i] = 1; } else { @@ -441,7 +454,7 @@ public class extends VectorExpression { int i = sel[j]; outNull[i] = nullPos1[i] || nullPos2[i]; if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { @@ -453,7 +466,7 @@ public class extends VectorExpression { for(int i = 0; i != n; i++) { outNull[i] = nullPos1[i] || nullPos2[i]; if (!outNull[i]) { - if ((vector1[i], start1[i], length1[i], + if ((vector1[i], start1[i], length1[i], vector2[i], start2[i], length2[i])) { outVector[i] = 1; } else { diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt index ca55834..e2cb01c 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -53,31 +53,43 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; + // TEMPORARILY: + outputColVector.reset(); + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; if ((vector[0], start[0], length[0], value, 0, value.length)) { outVector[0] = 1; } else { @@ -102,7 +114,10 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt index ecb4d2a..ede6fd7 100644 --- ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -54,6 +54,13 @@ public abstract class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -62,22 +69,24 @@ public abstract class extends VectorExpression { int[] sel = batch.selected; boolean[] nullPos = inputColVector.isNull; boolean[] outNull = outputColVector.isNull; - int n = batch.size; + byte[][] vector = inputColVector.vector; int[] length = inputColVector.length; int[] start = inputColVector.start; - long[] outVector = outputColVector.vector; + long[] outVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; + if (inputColVector.noNulls) { - outputColVector.noNulls = true; + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; + outputColVector.isRepeating = true; if ((value, 0, value.length, vector[0], start[0], length[0])) { outVector[0] = 1; } else { @@ -102,7 +111,10 @@ public abstract class extends VectorExpression { } } } else { + + // Carefully handle NULLs... outputColVector.noNulls = false; + if (inputColVector.isRepeating) { outputColVector.isRepeating = true; outNull[0] = nullPos[0]; diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt index a27da10..dcc2c22 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateColumn.txt @@ -60,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,15 +80,9 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt index 9f708e2..bd780cd 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticDateScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -61,6 +62,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -74,52 +81,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt index b3d9a4b..526a52d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthColumn.txt @@ -59,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -73,15 +79,9 @@ public class extends VectorExpression { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt index e49f614..cee2355 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticIntervalYearMonthScalar.txt @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -70,52 +78,98 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratchTimestamp(0), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratchTimestamp(i), value, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt index 95e7271..e66d7aa 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampColumn.txt @@ -58,6 +58,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,12 +78,6 @@ public class extends VectorExpression { outputColVector = () batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; - - // return immediately if batch is empty - if (n == 0) { - return; - } outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt index 6baa72a..eff8c46 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnArithmeticTimestampScalar.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -59,6 +60,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -72,52 +79,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector1.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector1.noNulls; - outputColVector.isRepeating = inputColVector1.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + inputColVector1.asScratch(0), value, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector1.isRepeating) { - dtm.( - inputColVector1.asScratch(0), value, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector1.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - inputColVector1.asScratch(i), value, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + inputColVector1.asScratch(i), value, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt index 54a1a37..1627f61 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleColumn.txt @@ -52,6 +52,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -60,15 +66,9 @@ public class extends VectorExpression { inputColVector2 = () batch.cols[colNum2]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; [] vector2 = inputColVector2.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt index 3bb95dd..012b240 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareLongDoubleScalar.txt @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -54,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -61,61 +70,88 @@ public class extends VectorExpression { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.(0) value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector.(i) value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.(i) value ? 1 : 0; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector.(i) value ? 1 : 0; + } } } } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector.(0) value ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector.(i) value ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt index 3db5d01..ebfabce 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampColumn.txt @@ -55,6 +55,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,14 +73,8 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - int n = batch.size; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { - return; - } - outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating || inputColVector1.isRepeating && !inputColVector1.noNulls && inputColVector1.isNull[0] diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt index 1ee7b11..56d422f 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampColumnCompareTimestampScalar.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -55,6 +57,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -65,61 +73,88 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector1.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector1.isNull; + boolean[] outputIsNull = outputColVector.isNull; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector1.isRepeating) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector1.noNulls; if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - } - } - } else { - if (inputColVector1.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector1.compareTo(0, value) 0 ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector1.compareTo(i, value) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt index 509f264..8e87b8e 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticDateColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -74,6 +75,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -87,59 +94,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); - dtm.( - value, scratchTimestamp2, outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); + dtm.( + value, scratchTimestamp2, outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt index 2de3044..d01da9d 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticIntervalYearMonthColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; @@ -73,6 +74,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -86,59 +93,108 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; long[] vector2 = inputColVector2.vector; - // return immediately if batch is empty - if (n == 0) { + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchIntervalYearMonth2.set((int) vector2[0]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - scratchIntervalYearMonth2.set((int) vector2[0]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } else { - for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } + } else { + for(int i = 0; i != n; i++) { + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - scratchIntervalYearMonth2.set((int) vector2[i]); - dtm.( - value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); - outputColVector.setFromScratchTimestamp(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + scratchIntervalYearMonth2.set((int) vector2[i]); + dtm.( + value, scratchIntervalYearMonth2, outputColVector.getScratchTimestamp()); + outputColVector.setFromScratchTimestamp(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt index 4ed80d1..24fcf14 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarArithmeticTimestampColumn.txt @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -71,6 +72,12 @@ public class extends VectorExpression { */ public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -84,53 +91,99 @@ public class extends VectorExpression { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector2.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector2.noNulls; - outputColVector.isRepeating = inputColVector2.isRepeating; - int n = batch.size; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + dtm.( + value, inputColVector2.asScratch(0), outputColVector.getScratch()); + outputColVector.setFromScratch(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - if (inputColVector2.isRepeating) { - dtm.( - value, inputColVector2.asScratch(0), outputColVector.getScratch()); - outputColVector.setFromScratch(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (inputColVector2.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } else { - for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } + } else { + for(int i = 0; i != n; i++) { + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - dtm.( - value, inputColVector2.asScratch(i), outputColVector.getScratch()); - outputColVector.setFromScratch(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + dtm.( + value, inputColVector2.asScratch(i), outputColVector.getScratch()); + outputColVector.setFromScratch(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt index 6cca0bb..1987ceb 100644 --- ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt +++ ql/src/gen/vectorization/ExpressionTemplates/TimestampScalarCompareTimestampColumn.txt @@ -18,10 +18,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; +import java.util.Arrays; import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -57,6 +59,12 @@ public class extends VectorExpression { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -67,61 +75,90 @@ public class extends VectorExpression { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector2.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector2.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector2.isRepeating) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector2.noNulls; if (inputColVector2.noNulls) { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } } else { - for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - } - } - } else { - if (inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inputColVector2.compareTo(value, 0) 0 ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; - outNulls[i] = false; } else { - //comparison with null is null - outNulls[i] = true; + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { + outputIsNull[i] = false; outputVector[i] = inputColVector2.compareTo(value, i) 0 ? 1 : 0; + } else { + // Comparison with NULL is NULL. + outputIsNull[i] = true; + outputColVector.noNulls = false; } } } diff --git ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt index 4ab3e76..3c8f8822 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnColumnOperationVectorExpressionEvaluation.txt @@ -48,9 +48,13 @@ || inputColumnVector1.isRepeating && inputColumnVector2.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand columns", inputColumnVector1.noNulls && inputColumnVector2.noNulls, outputColumnVector.noNulls); + */ //if repeating, only the first value matters if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { diff --git ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt index e5f3f18..991135c 100644 --- ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt +++ ql/src/gen/vectorization/TestTemplates/TestColumnScalarOperationVectorExpressionEvaluation.txt @@ -45,9 +45,13 @@ "Output column vector is repeating state does not match operand column", inputColumnVector.isRepeating, outputColumnVector.isRepeating); + /* + We no longer set noNulls to the input ColumnVector's value since that doesn't work + for scratch column reuse. assertEquals( "Output column vector no nulls state does not match operand column", inputColumnVector.noNulls, outputColumnVector.noNulls); + */ if(!outputColumnVector.noNulls && !outputColumnVector.isRepeating) { for(int i = 0; i < BATCH_SIZE; i++) { diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index 733731f..8bce425 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -320,7 +320,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt index 6e42598..40f8cd2 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt @@ -359,7 +359,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum.setFromLong(0L); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt index d5325c3..e4e5baa 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal64ToDecimal.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { final long value = vector[0]; for (int i = 0; i < batchSize; i++) { myagg.avgValue(value); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt index 8ab393c..0a71a21 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt @@ -368,7 +368,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeSum.setFromLong(0L); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt index be2fadd..a3c07a0 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt @@ -334,7 +334,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.mergeCount = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt index 6190a9e..576f7ec 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt @@ -317,7 +317,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt index fd54256..2df45bb 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMax.txt @@ -314,7 +314,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { myagg.minmaxValue(vector[0]); } return; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 4764a45..9fe85d3 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -318,7 +318,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls && + if ((inputVector.noNulls || !inputVector.isNull[0]) && (myagg.isNull || (myagg.value.compareTo(vector[0]) 0))) { myagg.isNull = false; myagg.value.set(vector[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 4680161..9a0a6e7 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -307,7 +307,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.intervalDayTimeUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index 027688d..4f0b5a5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -278,7 +278,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { myagg.checkValue(inputColumn.vector[0], inputColumn.start[0], inputColumn.length[0]); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 370b6a8..579437e 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -309,7 +309,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls && + if ((inputColVector.noNulls || !inputColVector.isNull[0]) && (myagg.isNull || (inputColVector.compareTo(myagg.value, 0) 0))) { myagg.isNull = false; inputColVector.timestampUpdate(myagg.value, 0); diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index 3e3d070..c731869 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index cb9c962..876ead5 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { [] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index 3d03c09..cf19b14 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -311,7 +311,7 @@ public class extends VectorAggregateExpression { HiveDecimalWritable[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { iterateRepeatingNoNulls(myagg, vector[0], inputVector.scale, batchSize); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt index 570d771..9b1c1cd 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -371,7 +371,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputStructColVector.isRepeating) { - if (inputStructColVector.noNulls) { + if (inputStructColVector.noNulls || !inputStructColVector.isNull[0]) { final long count = countVector[0]; final double sum = sumVector[0]; final double variance = varianceVector[0]; diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt index d6cd505..1dd5ab4 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt @@ -290,7 +290,7 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation)agg; if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 8264e8a..dfa79e6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2936,6 +2936,20 @@ private VectorExpression getWhenExpression(List childExpr, childExpr.subList(2, childExpr.size())); } + if (isNullConst(thenDesc) && isNullConst(elseDesc)) { + + // THEN NULL ELSE NULL: An unusual "case", but possible. + final int outputColumnNum = ocm.allocateOutputColumn(returnType); + + final VectorExpression resultExpr = + new IfExprNullNull( + outputColumnNum); + + resultExpr.setOutputTypeInfo(returnType); + resultExpr.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + return resultExpr; + } if (isNullConst(thenDesc)) { final VectorExpression whenExpr = getVectorExpression(whenDesc, mode); final VectorExpression elseExpr = getVectorExpression(elseDesc, mode); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 66de847..44b7c95 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -97,19 +97,6 @@ public static void setNullColIsNullValue(ColumnVector cv, int rowIndex) { } /** - * Iterates thru all the column vectors and sets noNull to - * specified value. - * - * @param batch - * Batch on which noNull is set - */ - public static void setNoNullFields(VectorizedRowBatch batch) { - for (int i = 0; i < batch.numCols; i++) { - batch.cols[i].noNulls = true; - } - } - - /** * Iterates thru all the column vectors and sets repeating to * specified column. * diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 0e703a5..ff55f50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -423,7 +423,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Boolean) value == true ? 1 : 0); - lcv.isNull[0] = false; } } break; @@ -436,7 +435,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Byte) value); - lcv.isNull[0] = false; } } break; @@ -449,7 +447,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Short) value); - lcv.isNull[0] = false; } } break; @@ -462,7 +459,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Integer) value); - lcv.isNull[0] = false; } } break; @@ -475,7 +471,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Long) value); - lcv.isNull[0] = false; } } break; @@ -488,7 +483,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(DateWritable.dateToDays((Date) value)); - lcv.isNull[0] = false; } } break; @@ -501,7 +495,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill((Timestamp) value); - lcv.isNull[0] = false; } } break; @@ -514,7 +507,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue lcv.isRepeating = true; } else { lcv.fill(((HiveIntervalYearMonth) value).getTotalMonths()); - lcv.isNull[0] = false; } } @@ -526,7 +518,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue icv.isRepeating = true; } else { icv.fill(((HiveIntervalDayTime) value)); - icv.isNull[0] = false; } } @@ -538,7 +529,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Float) value); - dcv.isNull[0] = false; } } break; @@ -551,7 +541,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dcv.isRepeating = true; } else { dcv.fill((Double) value); - dcv.isNull[0] = false; } } break; @@ -563,10 +552,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue dv.isNull[0] = true; dv.isRepeating = true; } else { - HiveDecimal hd = (HiveDecimal) value; - dv.set(0, hd); - dv.isRepeating = true; - dv.isNull[0] = false; + dv.fill((HiveDecimal) value); } } break; @@ -580,7 +566,6 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isRepeating = true; } else { bcv.fill(bytes); - bcv.isNull[0] = false; } } break; @@ -595,8 +580,7 @@ public void addPartitionColsToBatch(ColumnVector[] cols, Object[] partitionValue bcv.isNull[0] = true; bcv.isRepeating = true; } else { - bcv.setVal(0, sVal.getBytes()); - bcv.isRepeating = true; + bcv.fill(sVal.getBytes()); } } break; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java index c15bdc1..9ca2dbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,7 +60,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -67,39 +68,53 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDays(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setDays(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDays(outputColVector, vector, i); + // Set isNull before call in case it changes it mind. outputIsNull[i] = inputIsNull[i]; + setDays(outputColVector, vector, i); } } else { + // Set isNull before calls in case tney change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { setDays(outputColVector, vector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a2e4a52..778aea4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -75,57 +77,75 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + outV.isRepeating = true; + if (inV.noNulls || !inV.isNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; convert(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; convert(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { convert(outV, inV, i); } - outV.isRepeating = false; } - } else { - - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - convert(outV, inV, 0); - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; convert(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java index aa529ed..7ad0493 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToLong.java @@ -64,6 +64,7 @@ protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) { outV.noNulls = false; return; } + outV.isNull[i] = false; switch (integerPrimitiveCategory) { case BYTE: outV.vector[i] = decWritable.byteValue(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java index 08abf27..956092a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDoubleToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] vector = inputColVector.vector; @@ -66,36 +67,58 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setDouble(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + setDouble(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; setDouble(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setDouble(outputColVector, vector, i); } } outputColVector.isRepeating = false; } else /* there are nulls */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setDouble(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - setDouble(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setDouble(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } outputColVector.isRepeating = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java index df25eac..759c55c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -65,39 +66,64 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setSeconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setSeconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setSeconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setSeconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setSeconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setSeconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before call in case it changes it mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setSeconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + setSeconds(outputColVector, vector, i); + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java index 42c34c8..51f340e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastMillisecondsLongToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -54,7 +56,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] vector = inputColVector.vector; @@ -63,39 +64,69 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - setMilliseconds(outputColVector, vector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + setMilliseconds(outputColVector, vector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; setMilliseconds(outputColVector, vector, i); } } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { setMilliseconds(outputColVector, vector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - setMilliseconds(outputColVector, vector, i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { + // Set isNull before calls in case they change their mind. + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - setMilliseconds(outputColVector, vector, i); + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; + setMilliseconds(outputColVector, vector, i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 34269da..27eeb68 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -28,6 +28,7 @@ import org.apache.hive.common.util.DateParser; import java.nio.charset.StandardCharsets; +import java.util.Arrays; /** * Casts a string vector to a date vector. @@ -64,57 +65,74 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!inV.isNull[i]) { + if (!inputIsNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; evaluate(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 41443c5..bfc9953 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; @@ -81,46 +83,64 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; @@ -128,7 +148,12 @@ public void evaluate(VectorizedRowBatch batch) { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } outV.isRepeating = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java index 3ea1e8c..7667e12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -85,57 +87,77 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java index feb0ab6..1228cc7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalDayTime.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -60,57 +62,74 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; IntervalDayTimeColumnVector outV = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { evaluate(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java index 09dd4d9..fdbb18b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToIntervalYearMonth.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -58,57 +60,82 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; evaluate(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { evaluate(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - evaluate(outV, inV, 0); - } - } else if (batch.selectedInUse) { + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; evaluate(outV, inV, i); + } else { + outputIsNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java index a6cfee8..7eb07a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -183,57 +185,86 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. - outV.noNulls = false; if (inV.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { + if (inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outV.isNull[0] = false; func(outV, inV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; } } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inV.isNull[i]) { + // Set isNull before call in case it changes it mind. + outV.isNull[i] = false; func(outV, inV, i); + } else { + outV.isNull[i] = true; + outV.noNulls = false; } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java index 1231cda..7869999 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToBoolean.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -56,7 +58,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -65,39 +66,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = toBool(inputColVector, 0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = toBool(inputColVector, 0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); + outputIsNull[i] = false; + outputVector[i] = toBool(inputColVector, i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = toBool(inputColVector, i); outputIsNull[i] = inputIsNull[i]; + outputVector[i] = toBool(inputColVector, i); } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = toBool(inputColVector, i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java index e696455..4af823e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -51,7 +53,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; double[] outputVector = outputColVector.vector; @@ -60,39 +61,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getDouble(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getDouble(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getDouble(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getDouble(i); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getDouble(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getDouble(i); + if (!inputIsNull[i]) { + outputIsNull[i] = false; + outputVector[i] = inputColVector.getDouble(i); + } else { + outputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java index 36b9f13..8eb7cb3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -48,7 +50,6 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = inputColVector.noNulls; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -57,39 +58,63 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = inputColVector.getTimestampAsLong(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = inputColVector.getTimestampAsLong(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inputColVector.getTimestampAsLong(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inputColVector.getTimestampAsLong(i); } } outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = inputColVector.getTimestampAsLong(i); - outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } } else { for(int i = 0; i != n; i++) { - outputVector[i] = inputColVector.getTimestampAsLong(i); + if (!inputIsNull[i]) { + inputIsNull[i] = false; + outputVector[i] = inputColVector.getTimestampAsLong(i); + } else { + inputIsNull[i] = true; + outputColVector.noNulls = false; + } } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 127e431..6fb29a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -66,104 +68,121 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] & vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] & vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 1) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { + for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 1) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -171,49 +190,46 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 1); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 1); } } - outV.isRepeating = false; } outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ @@ -223,7 +239,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] & vector2[0]; - outV.isNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -231,32 +247,31 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value & vector2[i]; - outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2Value; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } @@ -267,21 +282,19 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] & vector2[i]; - outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 3542a07..9208cd4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -69,104 +71,120 @@ public void evaluate(VectorizedRowBatch batch) { return; } + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + long vector1Value = vector1[0]; long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { + // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = vector1[0] | vector2[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1Value | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2Value; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = vector1[i] | vector2[i]; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; } } - outV.isRepeating = false; } - outV.noNulls = true; - } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + return; + } + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outV.noNulls = false; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { // only input 2 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + outputIsNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + outputIsNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; } } - outV.isRepeating = false; } - outV.noNulls = false; } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { // only input 1 side has nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -174,51 +192,47 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + outputIsNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + outputIsNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); } } - outV.isRepeating = false; } - outV.noNulls = false; } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */{ // either input 1 or input 2 may have nulls if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { @@ -226,7 +240,7 @@ public void evaluate(VectorizedRowBatch batch) { // Repeating property will not change. outV.isRepeating = true; outputVector[0] = vector1[0] | vector2[0]; - outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) + outputIsNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[0] && (vector2[0] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { @@ -234,57 +248,53 @@ public void evaluate(VectorizedRowBatch batch) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1Value | vector2[i]; - outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2Value; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } - outV.isRepeating = false; } else /* neither side is repeating */{ if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { outputVector[i] = vector1[i] | vector2[i]; - outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + outputIsNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[i] && (vector2[i] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); } } - outV.isRepeating = false; } - outV.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java index c7cab2a..5b89131 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java @@ -121,70 +121,84 @@ public ConstantVectorExpression(int outputColumnNum, TypeInfo outputTypeInfo, bo isNullValue = isNull; } + /* + * In the following evaluate* methods, since we are supporting scratch column reuse, we must + * assume the column may have noNulls of false and some isNull entries true. + * + * So, do a proper assignments. + */ + private void evaluateLong(VectorizedRowBatch vrg) { + LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { + cv.isNull[0] = false; cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { + cv.isNull[0] = false; cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); + dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { - TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; - dcv.isRepeating = true; - dcv.noNulls = !isNullValue; + TimestampColumnVector tcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; + tcv.isRepeating = true; if (!isNullValue) { - dcv.set(0, timestampValue); + tcv.isNull[0] = false; + tcv.set(0, timestampValue); } else { - dcv.isNull[0] = true; + tcv.isNull[0] = true; + tcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { + dcv.isNull[0] = false; dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java index 2699681..aa74834 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateColSubtractDateScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -83,43 +84,62 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector1.isRepeating) { - scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); - dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector1.noNulls) { + if (inputColVector1.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[0])); + dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector1.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp1.setTime(DateWritable.daysToMillis((int) vector1[i])); dtm.subtract(scratchTimestamp1, value, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java index 946b738..7b325d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DateScalarSubtractDateColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -86,43 +87,61 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector2.isRepeating) { - scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); - dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); - outputColVector.setFromScratchIntervalDayTime(0); - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; - } else if (inputColVector2.noNulls) { + if (inputColVector2.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[0])); + dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); + outputColVector.setFromScratchIntervalDayTime(0); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); + return; + } + + if (inputColVector2.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } } - } else { /* there are nulls */ + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = inputIsNull[i]; scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); - outputIsNull[i] = inputIsNull[i]; } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { scratchTimestamp2.setTime(DateWritable.daysToMillis((int) vector2[i])); dtm.subtract(value, scratchTimestamp2, outputColVector.getScratchIntervalDayTime()); outputColVector.setFromScratchIntervalDayTime(i); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java index 9a8177c..41d279f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalColumnInList.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.util.DateTimeMath; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import java.sql.Timestamp; @@ -80,8 +81,8 @@ public void evaluate(VectorizedRowBatch batch) { DecimalColumnVector inputColumnVector = (DecimalColumnVector) batch.cols[inputColumn]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColumnVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColumnVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; HiveDecimalWritable[] vector = inputColumnVector.vector; long[] outputVector = outputColVector.vector; @@ -91,49 +92,51 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColumnVector.noNulls; - if (inputColumnVector.noNulls) { - if (inputColumnVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColumnVector.isRepeating) { + if (inputColumnVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColumnVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - if (inputColumnVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 791d8f2..7a0ee86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,40 +60,52 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } } else { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java index ba83b6a..33911fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleColumnInList.java @@ -65,8 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; double[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -76,49 +76,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java index c8b1dad..ec9ab01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DoubleToStringUnaryUDF.java @@ -58,19 +58,34 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector = inputColVector.vector; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); + boolean[] inputIsNull = inputColVector.isNull; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; func(outV, vector, i); @@ -82,18 +97,13 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -101,7 +111,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } else { System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -109,7 +118,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java index 1c1bc0b..252a816 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DynamicValueVectorExpression.java @@ -77,73 +77,73 @@ public DynamicValueVectorExpression(int outputColumnNum, TypeInfo typeInfo, private void evaluateLong(VectorizedRowBatch vrg) { LongColumnVector cv = (LongColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = longValue; cv.isNull[0] = false; + cv.vector[0] = longValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDouble(VectorizedRowBatch vrg) { DoubleColumnVector cv = (DoubleColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; if (!isNullValue) { - cv.vector[0] = doubleValue; cv.isNull[0] = false; + cv.vector[0] = doubleValue; } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateBytes(VectorizedRowBatch vrg) { BytesColumnVector cv = (BytesColumnVector) vrg.cols[outputColumnNum]; cv.isRepeating = true; - cv.noNulls = !isNullValue; cv.initBuffer(); if (!isNullValue) { - cv.setVal(0, bytesValue, 0, bytesValueLength); cv.isNull[0] = false; + cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; + cv.noNulls = false; } } private void evaluateDecimal(VectorizedRowBatch vrg) { DecimalColumnVector dcv = (DecimalColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.vector[0].set(decimalValue); dcv.isNull[0] = false; + dcv.set(0, decimalValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateTimestamp(VectorizedRowBatch vrg) { TimestampColumnVector dcv = (TimestampColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, timestampValue); dcv.isNull[0] = false; + dcv.set(0, timestampValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } private void evaluateIntervalDayTime(VectorizedRowBatch vrg) { IntervalDayTimeColumnVector dcv = (IntervalDayTimeColumnVector) vrg.cols[outputColumnNum]; dcv.isRepeating = true; - dcv.noNulls = !isNullValue; if (!isNullValue) { - dcv.set(0, intervalDayTimeValue); dcv.isNull[0] = false; + dcv.set(0, intervalDayTimeValue); } else { dcv.isNull[0] = true; + dcv.noNulls = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java index 28d800e..7c06697 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DoubleColumnVector outV = (DoubleColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java index 5fb9778..5480949 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -70,41 +72,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -112,7 +126,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -120,7 +133,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java index f518f39..92602dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDecimalToTimestamp.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -58,41 +60,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; TimestampColumnVector outV = (TimestampColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -100,7 +114,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -108,7 +121,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java index e632ff9..bde61b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncDoubleToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java index d500612..fd8d148 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,41 +59,53 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { // Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -99,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -107,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java index f93dbfc..4a35a54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -73,55 +74,66 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; outV.initBuffer(); + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; + if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; prepareResult(0, vector, outV); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; prepareResult(i, vector, outV); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { prepareResult(i, vector, outV); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - prepareResult(0, vector, outV); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { prepareResult(i, vector, outV); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { prepareResult(i, vector, outV); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java index 1a94408..bdbf4d8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java index d289dff..96ce35d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRandNoSeed.java @@ -55,9 +55,12 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; double[] outputVector = outputColVector.vector; - outputColVector.noNulls = true; outputColVector.isRepeating = false; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // return immediately if batch is empty if (n == 0) { return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java index ff8593e..1d534fb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRoundWithNumDigitsDecimalToDecimal.java @@ -68,14 +68,23 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. - outputIsNull[0] = inputIsNull[0]; - round(0, vector[0], decimalPlaces, outputColVector); + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; + round(0, vector[0], decimalPlaces, outputColVector); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -92,8 +101,11 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -106,7 +118,6 @@ public void evaluate(VectorizedRowBatch batch) { round(i, vector[i], decimalPlaces, outputColVector); } } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java index d474ff0..ead44eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -54,40 +56,52 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { //Nothing to do return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -103,7 +117,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java index 93cf1ec..cc97889 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToDecimal.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,6 +59,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; DecimalColumnVector outV = (DecimalColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -64,35 +68,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -100,7 +112,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -108,7 +119,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java index 9eb4312..2cd314b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncTimestampToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; @@ -58,6 +60,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inV.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -65,35 +69,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inV.noNulls) { - outV.noNulls = true; - if (inV.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inV.isRepeating) { + if (inV.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inV, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inV.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inV, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inV, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inV.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inV.isNull[0]; - if (!inV.isNull[0]) { - func(outV, inV, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inV.isNull[i]; @@ -101,7 +113,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } else { System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -109,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inV, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java index f9b3f76..e309fbe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java @@ -64,39 +64,77 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { - outputColVector.setElement(0, 0, arg2ColVector); + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); - } else { - outputColVector.noNulls = false; - isNull[i] = true; + + if (arg1ColVector.noNulls) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } } } - } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.setElement(i, i, arg2ColVector); - } else { - outputColVector.noNulls = false; - isNull[i] = true; + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[i] && vector1[i] == 1) { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } else { + isNull[i] = true; + outputColVector.noNulls = false; + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java index e7d4e4d..2c8a0b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java @@ -65,9 +65,8 @@ public void evaluate(VectorizedRowBatch batch) { DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later - int n = batch.size; + + int n = batch.size; long[] vector1 = arg1ColVector.vector; double[] vector2 = arg2ColVector.vector; double[] vector3 = arg3ColVector.vector; @@ -78,6 +77,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -85,7 +87,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,15 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -112,7 +123,16 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); } } - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java index fa7b2da..8c1cd90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java @@ -65,8 +65,7 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -75,6 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + /* All the code paths below propagate nulls even if neither arg2 nor arg3 * have nulls. This is to reduce the number of code paths and shorten the * code, at the expense of maybe doing unnecessary work if neither input @@ -82,7 +84,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -95,6 +97,15 @@ public void evaluate(VectorizedRowBatch batch) { arg3ColVector.flatten(batch.selectedInUse, sel, n); if (arg1ColVector.noNulls) { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; @@ -110,6 +121,15 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java index 487fb97..905ffba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -94,14 +98,25 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchIntervalDayTime(i) : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java index 7b18cf8..feab24a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; @@ -67,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -78,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -96,14 +100,25 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchIntervalDayTime(i)); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java index 0ba6722..7d7184a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeScalarScalar.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { IntervalDayTimeColumnVector outputColVector = (IntervalDayTimeColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,23 +81,37 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } } else /* there are nulls */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java index 0c8a2f6..d8ec895 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java @@ -64,8 +64,10 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; long[] vector2 = arg2ColVector.vector; @@ -96,6 +98,9 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java index 85c37f9..4c876bf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java @@ -64,39 +64,71 @@ public void evaluate(VectorizedRowBatch batch) { return; } - arg2ColVector.flatten(batch.selectedInUse, sel, n); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; if (arg1ColVector.isRepeating) { - if (!null1[0] && vector1[0] == 1) { + if ((arg1ColVector.noNulls || !null1[0]) && vector1[0] == 1) { + outputColVector.isRepeating = true; outputColVector.noNulls = false; isNull[0] = true; } else { - outputColVector.setElement(0, 0, arg2ColVector); + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } return; } - if (batch.selectedInUse) { - for (int j = 0; j < n; j++) { - int i = sel[j]; - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; - } else { - outputColVector.setElement(i, i, arg2ColVector); + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } } } } else { - for (int i = 0; i < n; i++) { - if (!null1[0] && vector1[i] == 1) { - outputColVector.noNulls = false; - isNull[i] = true; - } else { - outputColVector.setElement(i, i, arg2ColVector); + if (batch.selectedInUse) { + for (int j = 0; j < n; j++) { + int i = sel[j]; + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } + } + } else { + for (int i = 0; i < n; i++) { + if (!null1[0] && vector1[i] == 1) { + isNull[i] = true; + outputColVector.noNulls = false; + } else { + isNull[i] = false; + outputColVector.setElement(i, i, arg2ColVector); + } } } } - - arg2ColVector.unFlatten(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java new file mode 100644 index 0000000..5a68cec --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullNull.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IfExprNullNull extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public IfExprNullNull(int outputColumnNum) { + super(outputColumnNum); + } + + public IfExprNullNull() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + final ColumnVector outputColVector = batch.cols[outputColumnNum]; + + // We do not need to do a column reset since we are carefully changing the output. + + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; + outputColVector.isRepeating = true; + } + + @Override + public String vectorExpressionParameters() { + return "null, null"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + throw new UnsupportedOperationException("Undefined descriptor"); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java index 09aa9ab..deb4606 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -67,8 +67,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -98,6 +105,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java index 9167178..3cd3755 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -69,8 +69,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -99,6 +106,11 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the inputs arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java index 84d0052..51b03dc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -70,8 +70,15 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -100,6 +107,11 @@ public void evaluate(VectorizedRowBatch batch) { // extend any repeating values and noNulls indicator in the input arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index 5ed457b..9c0e7be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -67,8 +68,11 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - outputColVector.noNulls = true; // output must be a scalar and neither one is null - outputColVector.isRepeating = false; // may override later + boolean[] outputIsNull = outputColVector.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,11 +84,12 @@ public void evaluate(VectorizedRowBatch batch) { outputColVector.initBuffer(); if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { + outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { - outputColVector.fill(arg3Scalar); + outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); } + outputColVector.isRepeating = true; return; } @@ -92,6 +97,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -99,6 +105,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); @@ -111,6 +118,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { @@ -118,6 +126,7 @@ public void evaluate(VectorizedRowBatch batch) { } } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java index ee3cd19..3c83566 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -64,8 +66,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +85,7 @@ public void evaluate(VectorizedRowBatch batch) { * of code paths. */ if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -93,6 +97,11 @@ public void evaluate(VectorizedRowBatch batch) { arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java index b98ddbe..c0cb2c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnScalarBase.java @@ -19,13 +19,12 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.sql.Timestamp; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; /** * Compute IF(expr1, expr2, expr3) for 3 input column expressions. @@ -70,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -81,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); @@ -93,13 +94,19 @@ public void evaluate(VectorizedRowBatch batch) { // reduce the number of code paths needed below. arg2ColVector.flatten(batch.selectedInUse, sel, n); + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2ColVector.asScratchTimestamp(i) : arg3Scalar); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java index abd585d..823b87c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java @@ -69,8 +69,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -80,7 +82,7 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); @@ -98,6 +100,7 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3ColVector.asScratchTimestamp(i)); } } else { @@ -106,6 +109,12 @@ public void evaluate(VectorizedRowBatch batch) { } } } else /* there are nulls */ { + + /* + * Do careful maintenance of NULLs. + */ + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java index 24299e9..1649237 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java @@ -68,8 +68,10 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector outputColVector = (TimestampColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = false; // output is a scalar which we know is non null - outputColVector.isRepeating = false; // may override later + + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + int n = batch.size; long[] vector1 = arg1ColVector.vector; @@ -79,18 +81,27 @@ public void evaluate(VectorizedRowBatch batch) { } if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { + if ((arg1ColVector.noNulls || !arg1ColVector.isNull[0]) && vector1[0] == 1) { outputColVector.fill(arg2Scalar); } else { outputColVector.fill(arg3Scalar); } - } else if (arg1ColVector.noNulls) { + return; + } + + /* + * Since we always set a value, make sure all isNull entries are set to false. + */ + + if (arg1ColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, vector1[i] == 1 ? arg2Scalar : arg3Scalar); } @@ -99,16 +110,16 @@ public void evaluate(VectorizedRowBatch batch) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); - outputIsNull[i] = false; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputColVector.set(i, !arg1ColVector.isNull[i] && vector1[i] == 1 ? arg2Scalar : arg3Scalar); } - Arrays.fill(outputIsNull, 0, n, false); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java index 6b141d1..a5cddc6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,35 +54,45 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; if (n <= 0) { // Nothing to do return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; - if (inputColVector.noNulls) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 1; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Selection property will not change. - outputVector[0] = nullPos[0] ? 0 : 1; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 0 : 1; } else { - batch.cols[outputColumnNum].isRepeating = false; + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 0 : 1; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 0 : 1; + outputVector[i] = inputIsNull[i] ? 0 : 1; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java index 7347800..17d567f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -52,34 +54,47 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector inputColVector = batch.cols[colNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; - long[] outputVector = ((LongColumnVector) batch.cols[outputColumnNum]).vector; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; + long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; + if (n <= 0) { // Nothing to do, this is EOF return; } - // output never has nulls for this operator - batch.cols[outputColumnNum].noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { + outputColVector.isRepeating = true; + outputIsNull[0] = false; outputVector[0] = 0; - batch.cols[outputColumnNum].isRepeating = true; } else if (inputColVector.isRepeating) { - outputVector[0] = nullPos[0] ? 1 : 0; - batch.cols[outputColumnNum].isRepeating = true; + outputColVector.isRepeating = true; + outputIsNull[0] = false; + outputVector[0] = inputIsNull[0] ? 1 : 0; } else { + + /* + * Since we have a result for all rows, we don't need to do conditional NULL maintenance or + * turn off noNulls.. + */ + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = nullPos[i] ? 1 : 0; + outputIsNull[i] = false; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for (int i = 0; i != n; i++) { - outputVector[i] = nullPos[i] ? 1 : 0; + outputVector[i] = inputIsNull[i] ? 1 : 0; } } - batch.cols[outputColumnNum].isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java index dfe3bd1..c18a922 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColColumn.java @@ -56,7 +56,9 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector indexColumnVector = (LongColumnVector) batch.cols[indexColumnNum]; long[] indexV = indexColumnVector.vector; - outV.noNulls = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -68,8 +70,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + indexV[0]), childV); } outV.isRepeating = true; } else { @@ -83,7 +85,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = false; } } - outV.isRepeating = false; } } } else { @@ -97,7 +98,6 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[j] = false; } } - outV.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java index 62860df..11ee38c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ListIndexColScalar.java @@ -53,7 +53,10 @@ public void evaluate(VectorizedRowBatch batch) { ListColumnVector listV = (ListColumnVector) batch.cols[listColumnNum]; ColumnVector childV = listV.child; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (listV.isRepeating) { if (listV.isNull[0]) { outV.isNull[0] = true; @@ -63,8 +66,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, (int) (listV.offsets[0] + index), childV); outV.isNull[0] = false; + outV.setElement(0, (int) (listV.offsets[0] + index), childV); } } outV.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index 242fddc..789a01d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColEqualLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -45,6 +47,12 @@ public LongColEqualLongScalar() { @Override public void evaluate(VectorizedRowBatch batch) { + // return immediately if batch is empty + final int n = batch.size; + if (n == 0) { + return; + } + if (childExpressions != null) { super.evaluateChildren(batch); } @@ -52,55 +60,82 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; - int n = batch.size; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; - // return immediately if batch is empty - if (n == 0) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; return; } - outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = vector[0] == value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] == value ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] == value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] == value ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] == value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] == value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((vector[i] - value) ^ (value - vector[i])) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index 633015e..7f622b4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColGreaterEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] >= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] >= value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] >= value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] >= value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] >= value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] >= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 25c07df..eec3b89 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongColGreaterLongScalar extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColGreaterLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] > value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] > value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] > value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] > value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] > value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] > value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 1e5b349..d2826c2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongColLessEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] <= value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] <= value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] <= value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] <= value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] <= value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] <= value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index 2f282a9..a7a9965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColLessLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] < value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] < value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] < value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] < value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] < value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] < value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 0e78f8d..0e76019 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongColNotEqualLongScalar(int colNum, long value, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = vector[0] != value ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = vector[i] != value ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = vector[0] != value ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = vector[i] != value ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector[i] != value ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] != value ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java index 6c5bb68..d7be120 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColumnInList.java @@ -64,8 +64,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -75,49 +75,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 8d915c2..b767a54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = vector[0] == value ? 1 : 0; + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - outputVector[0] = value == vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value == vector[i] ? 1 : 0; + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value == vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" - outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; - } - } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value == vector[0] ? 1 : 0; - outNulls[0] = false; + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value == vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a == b" is "(((a - b) ^ (b - a)) >>> 63) ^ 1" + outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value == vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (((value - vector[i]) ^ (vector[i] - value)) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index a06fb08..0279cc1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -25,8 +27,8 @@ public class LongScalarGreaterEqualLongColumn extends VectorExpression { private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarGreaterEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -52,8 +54,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -63,44 +65,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value >= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value >= vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" - outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value >= vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value >= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value >= vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a >= b" is "((a - b) >>> 63) ^ 1" + outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value >= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((value - vector[i]) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index 6610288..4a0a376 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private int colNum; - private long value; + protected int colNum; + protected long value; public LongScalarGreaterLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value > vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value > vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a > b" is "(b - a) >>> 63" - outputVector[i] = (vector[i] - value) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value > vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value > vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value > vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a > b" is "(b - a) >>> 63" + outputVector[i] = (vector[i] - value) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value > vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (vector[i] - value) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index 7a305d3..eb8973f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value <= vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value <= vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" - outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value <= vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value <= vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value <= vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a <= b" is "((b - a) >>> 63) ^ 1" + outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value <= vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) >>> 63) ^ 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index 763dfdf..0b71893 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarLessLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value < vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value < vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a < b" is "(a - b) >>> 63" - outputVector[i] = (value - vector[i]) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value < vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value < vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value < vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a < b" is "(a - b) >>> 63" + outputVector[i] = (value - vector[i]) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value < vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = (value - vector[i]) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index aecaed2..f5180a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -26,8 +28,8 @@ private static final long serialVersionUID = 1L; - private final int colNum; - private final long value; + protected final int colNum; + protected final long value; public LongScalarNotEqualLongColumn(long value, int colNum, int outputColumnNum) { super(outputColumnNum); @@ -53,8 +55,8 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -64,44 +66,76 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; outputVector[0] = value != vector[0] ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outputVector[i] = value != vector[i] ? 1 : 0; - } } else { - for(int i = 0; i != n; i++) { - // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" - outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; - } + outputIsNull[0] = true; + outputColVector.noNulls = false; } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = value != vector[0] ? 1 : 0; - outNulls[0] = false; + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + + // Carefully handle NULLs... + if (!outputColVector.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputIsNull[i] = false; + outputVector[i] = value != vector[i] ? 1 : 0; + } + } else { + Arrays.fill(outputIsNull, 0, n, false); + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = value != vector[i] ? 1 : 0; + } } else { - outNulls[0] = true; + for(int i = 0; i != n; i++) { + // The SIMD optimized form of "a != b" is "((a - b) ^ (b - a)) >>> 63" + outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; + } } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value != vector[i] ? 1 : 0; - outNulls[i] = nullPos[i]; + outputIsNull[i] = inputIsNull[i]; } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = ((vector[i] - value) ^ (value - vector[i])) >>> 63; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java index c52e337..a9e2691 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java @@ -65,35 +65,47 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; + + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; func(outV, vector, i); } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outputIsNull[i] = false; func(outV, vector, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... // Handle case with nulls. Don't do function if the value is null, // because the data may be undefined for a null value. outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -109,7 +121,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java index ccc0fcb..3615de3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -74,38 +76,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java index 3375a56..f57d487 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToDouble.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -73,38 +75,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + cleanup(outputColVector, sel, batch.selectedInUse, n); + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } - outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } cleanup(outputColVector, sel, batch.selectedInUse, n); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java index 898cf96..fbacb0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncLongToLong.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -72,38 +74,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.isRepeating) { - outputVector[0] = func(vector[0]); + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // Even if there are no nulls, we always copy over entry 0. Simplifies code. - outputIsNull[0] = inputIsNull[0]; + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + outputIsNull[0] = false; + outputVector[0] = func(vector[0]); + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } outputColVector.isRepeating = true; - } else if (inputColVector.noNulls) { + return; + } + + if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = func(vector[i]); } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } } outputColVector.isRepeating = false; - } else /* there are nulls */ { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = func(vector[i]); outputIsNull[i] = inputIsNull[i]; - } + outputVector[i] = func(vector[i]); + } } else { + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { outputVector[i] = func(vector[i]); } - System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 30f20f3..be69f7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -54,45 +56,61 @@ public void evaluate(VectorizedRowBatch batch) { long[] vector = inputColVector.vector; LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n <= 0) { // Nothing to do, this is EOF return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; // 0 XOR 1 yields 1, 1 XOR 1 yields 0 outputVector[0] = vector[0] ^ 1; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outputVector[0] = vector[0] ^ 1; - outV.isNull[0] = inputColVector.isNull[0]; - } else if (batch.selectedInUse) { - outV.isRepeating = false; + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } } else { - outV.isRepeating = false; for (int i = 0; i != n; i++) { outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java index bfd7334..6450a0a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -60,8 +60,11 @@ public void evaluate(VectorizedRowBatch batch) { return; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + if (inputColVector.noNulls) { - outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; resultLen[0] = length[0]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java index 20a0a37..db684c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectStringColLikeStringScalar.java @@ -15,10 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor; import org.apache.hadoop.hive.ql.exec.vector.expressions.AbstractFilterStringColLikeStringScalar.Checker; @@ -70,42 +71,50 @@ public void evaluate(VectorizedRowBatch batch) { LongColumnVector outV = (LongColumnVector) batch.cols[outputColumnNum]; long[] outputVector = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; // return immediately if batch is empty if (n == 0) { return; } - outV.noNulls = inputColVector.noNulls; - outV.isRepeating = inputColVector.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); outV.isNull[i] = false; + outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); } } else { + Arrays.fill(outV.isNull, 0, n, false); for (int i = 0; i != n; i++) { outputVector[i] = (checker.check(vector[i], start[i], length[i]) ? 1 : 0); - outV.isNull[i] = false; } } - } else { - if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = (checker.check(vector[0], start[0], length[0]) ? 1 : 0); - outV.isNull[0] = false; - } else { - outputVector[0] = LongColumnVector.NULL_VALUE; - outV.isNull[0] = true; - } - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + + if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; if (!nullPos[i]) { @@ -114,6 +123,7 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } else { @@ -124,11 +134,12 @@ public void evaluate(VectorizedRowBatch batch) { } else { outputVector[i] = LongColumnVector.NULL_VALUE; outV.isNull[i] = true; + outV.noNulls = false; } } } } - } + } private Checker borrowChecker() { FilterStringColLikeStringScalar fil = new FilterStringColLikeStringScalar(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java index c889ac1..60040a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringColumnInList.java @@ -74,57 +74,65 @@ public void evaluate(VectorizedRowBatch batch) { BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; int n = batch.size; byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] len = inputColVector.length; long[] outputVector = outputColVector.vector; + boolean[] outputIsNull = outputColVector.isNull; // return immediately if batch is empty if (n == 0) { return; } - outputColVector.isRepeating = inputColVector.isRepeating; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { + // We do not need to do a column reset since we are carefully changing the output. + outputColVector.isRepeating = false; - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { + } else /* there are nulls in the inputColVector */ { - // All must be selected otherwise size would be zero - // Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.lookup(vector[0], start[0], len[0]) ? 1 : 0; - } - outputColVector.isNull[0] = nullPos[0]; - } else if (batch.selectedInUse) { + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - if (!nullPos[i]) { + outputColVector.isNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } - outputColVector.isNull[i] = nullPos[i]; } } else { - System.arraycopy(nullPos, 0, outputColVector.isNull, 0, n); + System.arraycopy(inputIsNull, 0, outputColVector.isNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i], start[i], len[i]) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index f730c9d..ccd947c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -62,6 +63,8 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -72,53 +75,60 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index cbdcc76..b202b4e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -74,6 +74,11 @@ public void evaluate(VectorizedRowBatch batch) { // prepare output buffer to accept results outV.initBuffer(); + if (!outV.noNulls) { + // TEMPORARILY: + outV.reset(); + } + /* Handle default case for isRepeating setting for output. This will be set to true * later in the special cases where that is necessary. */ @@ -81,7 +86,7 @@ public void evaluate(VectorizedRowBatch batch) { if (inV1.noNulls && !inV2.noNulls) { - // propagate nulls + // Carefully handle NULLs... /* We'll assume that there *may* be nulls in the input if !noNulls is true * for an input vector. This is to be more forgiving of errors in loading @@ -89,6 +94,7 @@ public void evaluate(VectorizedRowBatch batch) { * isNull[0] is set if !noNulls and isRepeating are true for the vector. */ outV.noNulls = false; + if (inV2.isRepeating) { if (inV2.isNull[0]) { @@ -321,8 +327,9 @@ public void evaluate(VectorizedRowBatch batch) { } } else { // there are no nulls in either input vector - // propagate null information - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ // perform data operation if (inV1.isRepeating && inV2.isRepeating) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index 9b9c063..e537f8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -60,59 +62,64 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; long[] resultLen = outV.vector; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { //Nothing to do return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector.isNull[i]; if (!inputColVector.isNull[i]) { resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); } - outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index 94fbef8..6f75a91 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -62,6 +63,8 @@ public void evaluate(VectorizedRowBatch batch) { byte[][] vector = inputColVector.vector; int[] start = inputColVector.start; int[] length = inputColVector.length; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; if (n == 0) { @@ -72,37 +75,46 @@ public void evaluate(VectorizedRowBatch batch) { // initialize output vector buffer to receive data outV.initBuffer(); - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isRepeating = false; } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i != n; i++) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... /* * Handle case with nulls. Don't do function if the value is null, to save time, * because calling the function can be expensive. */ outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; if (!inputColVector.isNull[i]) { @@ -110,7 +122,6 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } else { for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { @@ -118,7 +129,6 @@ public void evaluate(VectorizedRowBatch batch) { } outV.isNull[i] = inputColVector.isNull[i]; } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 5934f6f..de416a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -139,15 +139,17 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inV.start; outV.initBuffer(); + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.isRepeating) { - outV.isRepeating = true; if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); return; } else { - outV.noNulls = true; + outV.isNull[0] = false; int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx); if (offset != -1) { outV.setVal(0, vector[0], offset, len[0] - (offset - start[0])); @@ -155,58 +157,55 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], - startIdx); - outV.isNull[selected] = false; - if (offset != -1) { - outV.setVal(selected, vector[selected], offset, - len[selected] - (offset - start[selected])); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; + outV.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], startIdx); + outV.isNull[selected] = false; if (offset != -1) { outV.setVal(selected, vector[selected], offset, len[selected] - (offset - start[selected])); } else { outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outV.isNull[selected] = true; } } } else { - if (!inV.noNulls) { - outV.noNulls = false; - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); - if (offset != -1) { - outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outV.isNull[selected] = false; + int offset = getSubstrStartOffset(vector[selected], start[selected], len[selected], + startIdx); + if (offset != -1) { + outV.setVal(selected, vector[selected], offset, + len[selected] - (offset - start[selected])); + } else { + outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); if (offset != -1) { outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); @@ -215,6 +214,16 @@ public void evaluate(VectorizedRowBatch batch) { } } } + } else { + for (int i = 0; i != n; ++i) { + outV.isNull[i] = false; + int offset = getSubstrStartOffset(vector[i], start[i], len[i], startIdx); + if (offset != -1) { + outV.setVal(i, vector[i], offset, len[i] - (offset - start[i])); + } else { + outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + } + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 9d6eccf..c9338ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -160,15 +161,17 @@ public void evaluate(VectorizedRowBatch batch) { int[] start = inV.start; outV.initBuffer(); + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + if (inV.isRepeating) { - outV.isRepeating = true; + if (!inV.noNulls && inV.isNull[0]) { outV.isNull[0] = true; outV.noNulls = false; outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); - return; } else { - outV.noNulls = true; + outV.isNull[0] = false; populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray); if (offsetArray[0] != -1) { outV.setVal(0, vector[0], offsetArray[0], offsetArray[1]); @@ -176,30 +179,19 @@ public void evaluate(VectorizedRowBatch batch) { outV.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } } - } else { - outV.isRepeating = false; - if (batch.selectedInUse) { - if (!inV.noNulls) { - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; - if (!inV.isNull[selected]) { - outV.isNull[selected] = false; - populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, - length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } else { - outV.isNull[selected] = true; - } - } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - int selected = sel[i]; + outV.isRepeating = true; + return; + } + + if (batch.selectedInUse) { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + if (!inV.isNull[selected]) { outV.isNull[selected] = false; populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, length, offsetArray); @@ -208,26 +200,32 @@ public void evaluate(VectorizedRowBatch batch) { } else { outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outV.isNull[selected] = true; } } } else { - if (!inV.noNulls) { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - outV.noNulls = false; - for (int i = 0; i != n; ++i) { - if (!inV.isNull[i]) { - populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); - if (offsetArray[0] != -1) { - outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); - } else { - outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); - } - } + for (int i = 0; i != n; ++i) { + int selected = sel[i]; + outV.isNull[selected] = false; + populateSubstrOffsets(vector[selected], start[selected], len[selected], startIdx, + length, offsetArray); + if (offsetArray[0] != -1) { + outV.setVal(selected, vector[selected], offsetArray[0], offsetArray[1]); + } else { + outV.setVal(selected, EMPTY_STRING, 0, EMPTY_STRING.length); } - } else { - outV.noNulls = true; - for (int i = 0; i != n; ++i) { - outV.isNull[i] = false; + } + } + } else { + if (!inV.noNulls) /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outV.noNulls = false; + + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for (int i = 0; i != n; ++i) { + if (!inV.isNull[i]) { populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); if (offsetArray[0] != -1) { outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); @@ -236,6 +234,16 @@ public void evaluate(VectorizedRowBatch batch) { } } } + } else { + Arrays.fill(outV.isNull, 0, n, false); + for (int i = 0; i != n; ++i) { + populateSubstrOffsets(vector[i], start[i], len[i], startIdx, length, offsetArray); + if (offsetArray[0] != -1) { + outV.setVal(i, vector[i], offsetArray[0], offsetArray[1]); + } else { + outV.setVal(i, EMPTY_STRING, 0, EMPTY_STRING.length); + } + } } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 544b700..894bdae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -72,6 +72,8 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); Text t; @@ -86,26 +88,37 @@ public void evaluate(VectorizedRowBatch batch) { // It's implemented in the simplest way now, just calling the // existing built-in function. - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; s.set(vector[0], start[0], length[0]); t = func.evaluate(s); setString(outV, 0, t); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; /* Fill output isNull with false for selected elements since there is a chance we'll * convert to noNulls == false in setString(); */ - outV.isNull[i] = false; + outputIsNull[i] = false; s.set(vector[i], start[i], length[i]); t = func.evaluate(s); setString(outV, i, t); } - outV.isRepeating = false; } else { // Set all elements to not null. The setString call can override this. @@ -115,21 +128,13 @@ public void evaluate(VectorizedRowBatch batch) { t = func.evaluate(s); setString(outV, i, t); } - outV.isRepeating = false; } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; // setString can override this - if (!inputColVector.isNull[0]) { - s.set(vector[0], start[0], length[0]); - t = func.evaluate(s); - setString(outV, 0, t); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; // setString can override this @@ -139,7 +144,6 @@ public void evaluate(VectorizedRowBatch batch) { setString(outV, i, t); } } - outV.isRepeating = false; } else { // setString can override this null propagation @@ -151,7 +155,6 @@ public void evaluate(VectorizedRowBatch batch) { setString(outV, i, t); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 2f8b627..6cd003a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -60,6 +62,8 @@ public void evaluate(VectorizedRowBatch batch) { int start[] = inputColVector.start; int length[] = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); if (n == 0) { @@ -67,35 +71,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, vector, start, length, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, vector, start, length, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, vector, start, length, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, vector, start, length, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -103,7 +115,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, start, length, i); } } - outV.isRepeating = false; } else { System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { @@ -111,7 +122,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, vector, start, length, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java index 7fb95f5..bc50a7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampColumnInList.java @@ -73,8 +73,8 @@ public void evaluate(VectorizedRowBatch batch) { TimestampColumnVector inputColVector = (TimestampColumnVector) batch.cols[inputCol]; LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNulls = outputColVector.isNull; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; int n = batch.size; long[] outputVector = outputColVector.vector; @@ -83,49 +83,52 @@ public void evaluate(VectorizedRowBatch batch) { return; } + // We do not need to do a column reset since we are carefully changing the output. outputColVector.isRepeating = false; - outputColVector.noNulls = inputColVector.noNulls; - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - // All must be selected otherwise size would be zero - // Repeating property will not change. + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + } + outputColVector.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + outputIsNull[i] = false; outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } else { + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } - } else { - if (inputColVector.isRepeating) { - - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!nullPos[0]) { - outputVector[0] = inSet.contains(inputColVector.asScratchTimestamp(0)) ? 1 : 0; - outNulls[0] = false; - } else { - outNulls[0] = true; - } - outputColVector.isRepeating = true; - } else if (batch.selectedInUse) { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... + outputColVector.noNulls = false; + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outNulls[i] = nullPos[i]; - if (!nullPos[i]) { + outputIsNull[i] = inputIsNull[i]; + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } } else { - System.arraycopy(nullPos, 0, outNulls, 0, n); + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); for(int i = 0; i != n; i++) { - if (!nullPos[i]) { + if (!inputIsNull[i]) { outputVector[i] = inSet.contains(inputColVector.asScratchTimestamp(i)) ? 1 : 0; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java index 5eb2090..f1be8c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TimestampToStringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -57,6 +59,8 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumnNum]; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outV.isNull; outV.initBuffer(); if (n == 0) { @@ -64,35 +68,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputIsNull[0]) { + // Set isNull before call in case it changes it mind. + outputIsNull[0] = false; func(outV, inputColVector, 0); - } else if (batch.selectedInUse) { + } else { + outputIsNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + + if (inputColVector.noNulls) { + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; + // Set isNull before call in case it changes it mind. + outputIsNull[i] = false; func(outV, inputColVector, i); } - outV.isRepeating = false; } else { + // Set isNull before calls in case they change their mind. + Arrays.fill(outputIsNull, 0, n, false); for(int i = 0; i != n; i++) { func(outV, inputColVector, i); } - outV.isRepeating = false; } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, - // because the data may be undefined for a null value. + // Carefully handle NULLs... outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - func(outV, inputColVector, 0); - } - } else if (batch.selectedInUse) { + + if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.isNull[i] = inputColVector.isNull[i]; @@ -108,7 +120,6 @@ public void evaluate(VectorizedRowBatch batch) { func(outV, inputColVector, i); } } - outV.isRepeating = false; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java index ea78a2e..1e36048 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorCoalesce.java @@ -57,13 +57,13 @@ public void evaluate(VectorizedRowBatch batch) { int[] sel = batch.selected; int n = batch.size; - ColumnVector outputVector = batch.cols[outputColumnNum]; + ColumnVector outputColVector = batch.cols[outputColumnNum]; if (n <= 0) { // Nothing to do return; } - outputVector.init(); + outputColVector.init(); boolean noNulls = false; @@ -74,44 +74,52 @@ public void evaluate(VectorizedRowBatch batch) { noNulls = noNulls || cv.noNulls; } - outputVector.noNulls = noNulls; - outputVector.isRepeating = false; + if (!outputColVector.noNulls) { + // TEMPORARILY: + outputColVector.reset(); + } + + outputColVector.isRepeating = false; + + // Carefully handle NULLs... + // TEMPORARILY: Assume the worst... + outputColVector.noNulls = false; ColumnVector first = batch.cols[inputColumns[0]]; if (first.noNulls && first.isRepeating) { - outputVector.isRepeating = true; - outputVector.isNull[0] = false; - outputVector.setElement(0, 0, first); + outputColVector.isRepeating = true; + outputColVector.isNull[0] = false; + outputColVector.setElement(0, 0, first); } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector.isNull[i] = true; + outputColVector.isNull[i] = true; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if ( (cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, 0, cv); break; } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, i, cv); break; } } } } else { for (int i = 0; i != n; i++) { - outputVector.isNull[i] = true; + outputColVector.isNull[i] = true; for (int k = 0; k < inputColumns.length; k++) { ColumnVector cv = batch.cols[inputColumns[k]]; if ((cv.isRepeating) && (cv.noNulls || !cv.isNull[0])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, 0, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, 0, cv); break; } else if ((!cv.isRepeating) && (cv.noNulls || !cv.isNull[i])) { - outputVector.isNull[i] = false; - outputVector.setElement(i, i, cv); + outputColVector.isNull[i] = false; + outputColVector.setElement(i, i, cv); break; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index e232555..24304e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -32,6 +32,7 @@ import org.apache.hive.common.util.DateParser; import java.sql.Date; +import java.util.Arrays; public class VectorUDFDateAddColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -89,27 +90,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; switch (primitiveCategory) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -130,22 +145,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -168,22 +197,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (batch.selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (batch.selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 0aaba26..c0fa1bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -30,6 +30,7 @@ import java.nio.charset.StandardCharsets; import java.sql.Date; import java.sql.Timestamp; +import java.util.Arrays; public class VectorUDFDateAddScalarCol extends VectorExpression { @@ -130,26 +131,43 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; long baseDateDays = DateWritable.millisToDays(baseDate.getTime()); + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluate(baseDateDays, inputCol.vector[0], outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluate(baseDateDays, inputCol.vector[i], outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluate(baseDateDays, inputCol.vector[i], outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 97e3669..2a0fd28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -35,6 +35,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffColScalar extends VectorExpression { private static final long serialVersionUID = 1L; @@ -92,8 +93,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; PrimitiveCategory primitiveCategory1 = ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { @@ -134,22 +135,36 @@ public void evaluate(VectorizedRowBatch batch) { PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); switch (primitiveCategory0) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -170,22 +185,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -208,22 +237,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index c575c05..85cf72d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -33,6 +33,7 @@ import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Arrays; public class VectorUDFDateDiffScalarCol extends VectorExpression { private static final long serialVersionUID = 1L; @@ -90,8 +91,8 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; PrimitiveCategory primitiveCategory0 = ((PrimitiveTypeInfo) inputTypeInfos[0]).getPrimitiveCategory(); @@ -134,22 +135,36 @@ public void evaluate(VectorizedRowBatch batch) { ((PrimitiveTypeInfo) inputTypeInfos[1]).getPrimitiveCategory(); switch (primitiveCategory1) { case DATE: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateDate(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateDate(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateDate(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -170,22 +185,36 @@ public void evaluate(VectorizedRowBatch batch) { break; case TIMESTAMP: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = evaluateTimestamp(inputCol, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = evaluateTimestamp(inputCol, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = evaluateTimestamp(inputCol, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; @@ -208,22 +237,36 @@ public void evaluate(VectorizedRowBatch batch) { case STRING: case CHAR: case VARCHAR: - if (inputCol.noNulls) { - outV.noNulls = true; + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + outV.isNull[0] = false; + evaluateString(inputCol, outV, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + } else if (inputCol.noNulls) { if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; evaluateString(inputCol, outV, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { evaluateString(inputCol, outV, i); } } - } else { + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs.. + // Handle case with nulls. Don't do function if the value is null, to save time, // because calling the function can be expensive. outV.noNulls = false; + if (selectedInUse) { for(int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java index 9d72bdf..d4805dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseCol.java @@ -55,7 +55,10 @@ public void evaluate(VectorizedRowBatch batch) { // indexColumnVector includes the keys of Map indexColumnVector = batch.cols[indexColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -71,9 +74,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; } else { // the key is found in MapColumnVector, set the value - outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); outV.isNull[0] = false; - outV.noNulls = true; + outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); } outV.isRepeating = true; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java index e6a86ae..9df891c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFMapIndexBaseScalar.java @@ -50,7 +50,10 @@ public void evaluate(VectorizedRowBatch batch) { ColumnVector outV = batch.cols[outputColumnNum]; MapColumnVector mapV = (MapColumnVector) batch.cols[mapColumnNum]; - outV.noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + int[] mapValueIndex; if (mapV.isRepeating) { if (mapV.isNull[0]) { @@ -65,7 +68,6 @@ public void evaluate(VectorizedRowBatch batch) { } else { // the key is found in MapColumnVector, set the value outV.setElement(0, (int) (mapV.offsets[0] + mapValueIndex[0]), mapV.values); - outV.noNulls = true; } } outV.isRepeating = true; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java index 0507fa5..c3eca0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFStructField.java @@ -58,8 +58,8 @@ public void evaluate(VectorizedRowBatch batch) { outV.isNull[0] = true; outV.noNulls = false; } else { - outV.setElement(0, 0, fieldColumnVector); outV.isNull[0] = false; + outV.setElement(0, 0, fieldColumnVector); } outV.isRepeating = true; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java index 519a4e4..411a33f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldDate.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -95,27 +96,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; LongColumnVector longColVector = (LongColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = getDateField(longColVector.vector[0]); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = getDateField(longColVector.vector[i]); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = getDateField(longColVector.vector[i]); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index c5762d1..2918546 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -106,11 +106,27 @@ public void evaluate(VectorizedRowBatch batch) { return; } - // true for all algebraic UDFs with no state - outV.isRepeating = inputCol.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; + + if (inputCol.isRepeating) { + if (inputCol.noNulls || !inputCol.isNull[0]) { + try { + outV.isNull[0] = false; + outV.vector[0] = getField(inputCol.vector[0], inputCol.start[0], inputCol.length[0]); + } catch (ParseException e) { + outV.noNulls = false; + outV.isNull[0] = true; + } + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } if (inputCol.noNulls) { - outV.noNulls = true; if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; @@ -133,11 +149,11 @@ public void evaluate(VectorizedRowBatch batch) { } } } - } else { + } else /* there are nulls in the inputColVector */ { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for (int j = 0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java index 54cb5d8..740a00c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldTimestamp.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; import java.util.Calendar; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -95,27 +96,41 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* true for all algebraic UDFs with no state */ - outV.isRepeating = inputColVec.isRepeating; + // We do not need to do a column reset since we are carefully changing the output. + outV.isRepeating = false; TimestampColumnVector timestampColVector = (TimestampColumnVector) inputColVec; + if (inputColVec.isRepeating) { + if (inputColVec.noNulls || !inputColVec.isNull[0]) { + outV.isNull[0] = false; + outV.vector[0] = getTimestampField(timestampColVector, 0); + } else { + outV.isNull[0] = true; + outV.noNulls = false; + } + outV.isRepeating = true; + return; + } + if (inputColVec.noNulls) { - outV.noNulls = true; if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; + outV.isNull[i] = false; outV.vector[i] = getTimestampField(timestampColVector, i); } } else { + Arrays.fill(outV.isNull, 0, n, false); for(int i = 0; i < n; i++) { outV.vector[i] = getTimestampField(timestampColVector, i); } } - } else { - // Handle case with nulls. Don't do function if the value is null, to save time, - // because calling the function can be expensive. + } else /* there are nulls in the inputColVector */ { + + // Carefully handle NULLs... outV.noNulls = false; + if (selectedInUse) { for(int j=0; j < n; j++) { int i = sel[j]; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 6ebd7d3..18bacc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -151,7 +151,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { valueProcessor.processValue(myagg, inputColumn, 0); } return; @@ -251,7 +251,11 @@ public void aggregateInputSelection( } } else { if (inputColumn.isRepeating) { - // All nulls, no-op for min/max + if (!inputColumn.isNull[0]) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, aggregateIndex, + inputColumn, batchSize); + } } else { if (batch.selectedInUse) { iterateHasNullsSelectionWithAggregationSelection( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java index 8f1375e..b08bc32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilterMerge.java @@ -123,7 +123,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation) agg; if (inputColumn.isRepeating) { - if (inputColumn.noNulls) { + if (inputColumn.noNulls || !inputColumn.isNull[0]) { processValue(myagg, inputColumn, 0); } return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java index 888f5f0..77e751d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFCountMerge.java @@ -284,9 +284,9 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation)agg; long[] vector = inputVector.vector; - + if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { myagg.value += vector[0]*batchSize; } return; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java index 251de3a..a503445 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64.java @@ -333,7 +333,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java index 06e319b..ddc190c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumDecimal64ToDecimal.java @@ -358,7 +358,7 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) long[] vector = inputVector.vector; if (inputVector.isRepeating) { - if (inputVector.noNulls) { + if (inputVector.noNulls || !inputVector.isNull[0]) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java index 3ca5ee0..e542033 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFSumTimestamp.java @@ -297,13 +297,13 @@ public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) Aggregation myagg = (Aggregation)agg; if (inputVector.isRepeating) { - if (inputVector.noNulls) { - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; + if (inputVector.noNulls || !inputVector.isNull[0]) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + } + myagg.sum += inputVector.getDouble(0) * batchSize; } - myagg.sum += inputVector.getDouble(0) * batchSize; - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java index 7166c64..9515832 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorCount.java @@ -62,7 +62,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } ColumnVector colVector = batch.cols[inputColumnNum]; if (colVector.isRepeating) { - if (colVector.noNulls) { + if (colVector.noNulls || !colVector.isNull[0]) { count += size; } } else if (colVector.noNulls) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java index 85e5ebe..bd4896a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalAvg.java @@ -72,7 +72,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. temp.setFromLong(batch.size); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java index ed11a09..daba90c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalFirstValue.java @@ -70,7 +70,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { firstValue.set(decimalColVector.vector[0]); isGroupResultNull = false; } @@ -86,6 +87,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DecimalColumnVector outputColVector = (DecimalColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -93,7 +98,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0].set(firstValue); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java index eb55792..b2cbdf6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalLastValue.java @@ -70,7 +70,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { lastValue.set(decimalColVector.vector[0]); isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java index 231cf9b..4d8c7fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMax.java @@ -64,7 +64,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { if (isGroupResultNull) { max.set(decimalColVector.vector[0]); isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java index 6373c09..312c43c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalMin.java @@ -64,7 +64,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { if (isGroupResultNull) { min.set(decimalColVector.vector[0]); isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java index 36dd119..e899c36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDecimalSum.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DecimalColumnVector decimalColVector = ((DecimalColumnVector) batch.cols[inputColumnNum]); if (decimalColVector.isRepeating) { - if (decimalColVector.noNulls) { + if (decimalColVector.noNulls || !decimalColVector.isNull[0]) { temp.setFromLong(batch.size); if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index 9ceeb13..e457e32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -51,7 +51,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = denseRank; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java index 271a936..298de2d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleAvg.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java index 9f65de4..460fbe5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleFirstValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { firstValue = doubleColVector.vector[0]; isGroupResultNull = false; } @@ -82,6 +83,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +94,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java index 8d28994..51d06a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleLastValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { lastValue = doubleColVector.vector[0]; isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java index 732369a..2bc4c6b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMax.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { max = doubleColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java index 91c538d..6a422b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleMin.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { min = doubleColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java index 0c534d8..2ecc4ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDoubleSum.java @@ -61,7 +61,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc DoubleColumnVector doubleColVector = ((DoubleColumnVector) batch.cols[inputColumnNum]); if (doubleColVector.isRepeating) { - if (doubleColVector.noNulls) { + if (doubleColVector.noNulls || !doubleColVector.isNull[0]) { if (isGroupResultNull) { // First aggregation calculation for group. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java index 66e8f98..f48df25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongAvg.java @@ -66,7 +66,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + if (longColVector.noNulls || !longColVector.isNull[0]) { // We have a repeated value. The sum increases by value * batch.size. if (isGroupResultNull) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java index 5151ecb..3deadb1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongFirstValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { firstValue = longColVector.vector[0]; isGroupResultNull = false; } @@ -82,6 +83,10 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc haveFirstValue = true; } + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + // First value is repeated for all batches. LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumnNum]; outputColVector.isRepeating = true; @@ -89,7 +94,6 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc outputColVector.noNulls = false; outputColVector.isNull[0] = true; } else { - outputColVector.noNulls = true; outputColVector.isNull[0] = false; outputColVector.vector[0] = firstValue; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java index fa8e880..0a5df51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongLastValue.java @@ -66,7 +66,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { lastValue = longColVector.vector[0]; isGroupResultNull = false; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java index b60b03b..0e7eb07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMax.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { max = longColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java index 26ea0df..df31c51 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongMin.java @@ -60,7 +60,8 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { min = longColVector.vector[0]; isGroupResultNull = false; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java index ce0acb5..9402218 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorLongSum.java @@ -61,7 +61,7 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); if (longColVector.isRepeating) { - if (longColVector.noNulls) { + if (longColVector.noNulls || !longColVector.isNull[0]) { if (isGroupResultNull) { // First aggregation calculation for group. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java index f7080e5..34add61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java @@ -50,9 +50,12 @@ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression in public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) { evaluateInputExpr(batch); + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true; - longColVector.noNulls = true; longColVector.isNull[0] = false; longColVector.vector[0] = rank; groupCount += batch.size; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java index f23a8b3..607d34a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFGroupBatches.java @@ -178,6 +178,11 @@ public void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatc } private void fillGroupResults(VectorizedRowBatch batch) { + + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ + for (VectorPTFEvaluatorBase evaluator : evaluators) { final int outputColumnNum = evaluator.getOutputColumnNum(); if (evaluator.streamsResult()) { @@ -190,7 +195,6 @@ private void fillGroupResults(VectorizedRowBatch batch) { if (isGroupResultNull) { outputColVector.noNulls = false; } else { - outputColVector.noNulls = true; switch (evaluator.getResultColumnVectorType()) { case LONG: ((LongColumnVector) outputColVector).vector[0] = evaluator.getLongGroupResult(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index a1a1282..82b7a15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -140,7 +140,9 @@ public void evaluate(VectorizedRowBatch batch) { return; } - batch.cols[outputColumnNum].noNulls = true; + /* + * Do careful maintenance of the outputColVector.noNulls flag. + */ /* If all input columns are repeating, just evaluate function * for row 0 in the batch and set output repeating. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 190771e..81b8826 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1701,7 +1701,7 @@ private boolean validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInf private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing MapWork..."); + LOG.info("Validating and vectorizing MapWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); // Set "global" member indicating where to store "not vectorized" information if necessary. currentBaseWork = mapWork; @@ -1905,7 +1905,7 @@ private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { - LOG.info("Validating and vectorizing ReduceWork..."); + LOG.info("Validating and vectorizing ReduceWork... (vectorizedVertexNum " + vectorizedVertexNum + ")"); Operator newVectorReducer; try { @@ -4101,9 +4101,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { for (int i = 0; i < size; i++) { ExprNodeDesc expr = colList.get(i); VectorExpression ve = vContext.getVectorExpression(expr); - if (ve.getOutputColumnNum() == -1) { - fake++; - } projectedOutputColumns[i] = ve.getOutputColumnNum(); if (ve instanceof IdentityExpression) { // Suppress useless evaluation. diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java index c646bf1..ea19e93 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -188,7 +188,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(-3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // verify when first argument (boolean flags) is repeating @@ -230,7 +229,6 @@ public void testLongColumnColumnIfExpr() { assertEquals(2, r.vector[1]); assertEquals(3, r.vector[2]); assertEquals(-4, r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); // test when second argument has nulls @@ -308,7 +306,6 @@ public void testDoubleColumnColumnIfExpr() { assertEquals(true, 2d == r.vector[1]); assertEquals(true, -3d == r.vector[2]); assertEquals(true, -4d == r.vector[3]); - assertEquals(true, r.noNulls); assertEquals(false, r.isRepeating); } @@ -480,7 +477,6 @@ public void testIfExprStringColumnStringScalar() { assertTrue(getString(r, 1).equals("scalar")); assertTrue(getString(r, 2).equals("arg2_2")); assertTrue(getString(r, 3).equals("arg2_3")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); @@ -504,7 +500,6 @@ public void testIfExprStringScalarStringColumn() { assertTrue(getString(r, 1).equals("arg3_1")); assertTrue(getString(r, 2).equals("scalar")); assertTrue(getString(r, 3).equals("scalar")); - assertTrue(r.noNulls); // test for null input strings batch = getBatch1Long3BytesVectors(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java index bd5a6b7..a60b9e4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -67,7 +67,6 @@ public void testLongColOrLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(1, outCol.vector[1]); Assert.assertEquals(1, outCol.vector[2]); @@ -125,7 +124,6 @@ public void testLongColAndLongCol() { expr.evaluate(batch); // spot check - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(0, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[1]); Assert.assertEquals(0, outCol.vector[2]); @@ -207,7 +205,6 @@ public void testBooleanNot() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertFalse(outCol.isRepeating); - Assert.assertTrue(outCol.noNulls); Assert.assertEquals(1, outCol.vector[0]); Assert.assertEquals(0, outCol.vector[2]); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index ca3c259..202f18c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -3757,7 +3757,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); Assert.assertEquals(4, batch.size); outVector = ((LongColumnVector) batch.cols[3]).vector; - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -3821,7 +3820,6 @@ public void testStringColCompareStringColProjection() { expr.evaluate(batch); outVector = ((LongColumnVector) batch.cols[3]).vector; Assert.assertEquals(4, batch.size); - Assert.assertFalse(batch.cols[3].noNulls); Assert.assertFalse(batch.cols[3].isNull[0]); Assert.assertEquals(1, outVector[0]); Assert.assertFalse(batch.cols[3].isNull[1]); @@ -4064,7 +4062,6 @@ public void testColLower() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatchMixedCase(); @@ -4124,7 +4121,6 @@ public void testStringLength() { expr.evaluate(batch); outCol = (LongColumnVector) batch.cols[1]; Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); Assert.assertEquals(7, outCol.vector[0]); // length of "mixedUp" // no nulls, is repeating @@ -4486,7 +4482,6 @@ public void testColConcatStringScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4549,7 +4544,6 @@ public void testColConcatCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4612,7 +4606,6 @@ public void testColConcatVarCharScalar() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4675,7 +4668,6 @@ public void testStringScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4738,7 +4730,6 @@ public void testCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4801,7 +4792,6 @@ public void testVarCharScalarConcatCol() { outCol.start[0], outCol.length[0]); Assert.assertEquals(0, cmp); Assert.assertTrue(outCol.isRepeating); - Assert.assertFalse(outCol.noNulls); // no nulls, is repeating batch = makeStringBatch(); @@ -4923,7 +4913,6 @@ public void testColConcatCol() { batch.cols[0].noNulls = true; expr.evaluate(batch); Assert.assertEquals(false, outCol.isRepeating); - Assert.assertEquals(true, outCol.noNulls); cmp = StringExpr.compare(red, 0, red.length, outCol.vector[2], outCol.start[2], outCol.length[2]); Assert.assertEquals(0, cmp); @@ -5015,7 +5004,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5043,7 +5031,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 1, 1); expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, @@ -5128,7 +5115,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3nd char starts from index 3 and total length should be 7 bytes as max is 10 @@ -5153,7 +5139,6 @@ public void testSubstrStart() throws UnsupportedEncodingException { expr = new StringSubstrColStart(0, 2, 1); expr.evaluate(batch); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // the result is the last 1 character, which occupies 4 bytes @@ -5190,7 +5175,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); byte[] expected = "string".getBytes("UTF-8"); Assert.assertEquals(0, @@ -5218,7 +5202,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr = new StringSubstrColStartLen(0, -6, 6, 1); expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(3, batch.size); @@ -5250,7 +5233,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5280,7 +5262,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; expr.evaluate(batch); Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5310,7 +5291,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(3, batch.size); - Assert.assertTrue(outCol.noNulls); Assert.assertFalse(outCol.isRepeating); Assert.assertEquals(0, StringExpr.compare( @@ -5391,7 +5371,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { expr.evaluate(batch); Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 3rd char starts at index 3, and with length 2 it is covering the rest of the array. @@ -5415,7 +5394,6 @@ public void testSubstrStartLen() throws UnsupportedEncodingException { outCol = (BytesColumnVector) batch.cols[1]; Assert.assertEquals(1, batch.size); Assert.assertFalse(outV.isRepeating); - Assert.assertTrue(outV.noNulls); Assert.assertEquals(0, StringExpr.compare( // 2nd substring index refers to the 6th index (last char in the array) diff --git ql/src/test/queries/clientpositive/vector_date_1.q ql/src/test/queries/clientpositive/vector_date_1.q index 0055973..bb515b1 100644 --- ql/src/test/queries/clientpositive/vector_date_1.q +++ ql/src/test/queries/clientpositive/vector_date_1.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.cli.print.header=true; drop table if exists vector_date_1; create table vector_date_1 (dt1 date, dt2 date) stored as orc; @@ -13,8 +14,10 @@ insert into table vector_date_1 insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1; +select * from vector_date_1 order by dt1, dt2; + -- column-to-column comparison in select clause -explain +explain vectorization detail select dt1, dt2, -- should be all true @@ -41,7 +44,7 @@ select dt2 > dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, dt2, -- should be all false @@ -69,7 +72,7 @@ select from vector_date_1 order by dt1; -- column-to-literal/literal-to-column comparison in select clause -explain +explain vectorization detail select dt1, -- should be all true @@ -96,7 +99,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1; -explain +explain vectorization detail select dt1, -- should all be false @@ -126,7 +129,7 @@ from vector_date_1 order by dt1; -- column-to-column comparisons in predicate -- all rows with non-null dt1 should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -153,7 +156,7 @@ order by dt1; -- column-to-literal/literal-to-column comparison in predicate -- only a single row should be returned -explain +explain vectorization detail select dt1, dt2 from vector_date_1 @@ -182,7 +185,7 @@ where and date '1970-01-01' <= dt1 order by dt1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01'); diff --git ql/src/test/queries/clientpositive/vectorization_nested_udf.q ql/src/test/queries/clientpositive/vectorization_nested_udf.q index da8f99c..25a25df 100644 --- ql/src/test/queries/clientpositive/vectorization_nested_udf.q +++ ql/src/test/queries/clientpositive/vectorization_nested_udf.q @@ -1,5 +1,8 @@ +set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc; SELECT SUM(abs(ctinyint)) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_case.q ql/src/test/queries/clientpositive/vectorized_case.q index 99d7cfc..8aad2b5 100644 --- ql/src/test/queries/clientpositive/vectorized_case.q +++ ql/src/test/queries/clientpositive/vectorized_case.q @@ -1,8 +1,8 @@ set hive.explain.user=false; set hive.fetch.task.conversion=none; -set hive.vectorized.execution.enabled = true -; -explain vectorization expression +set hive.vectorized.execution.enabled = true; + +explain vectorization detail select csmallint, case @@ -37,7 +37,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select csmallint, case @@ -55,7 +55,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 ; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -64,7 +64,7 @@ select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc; -explain vectorization expression +explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -79,19 +79,19 @@ CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC; INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr2 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1; --for length=2 and the expr3 is null -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1; @@ -102,19 +102,105 @@ CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC; INSERT INTO test_2 VALUES (3,1),(2,2),(1,3); --for length=3 -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2; ---for length=2 and the expression2 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail2 is null +EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2; ---for length=2 and the expression3 is null -EXPLAIN VECTORIZATION EXPRESSION +--for length=2 and the detail3 is null +EXPLAIN VECTORIZATION DETAIL +SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; + SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; -SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2; \ No newline at end of file + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a; + + +select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a; + +select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a; + + diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 1e3d2b3..bacd667 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -22,6 +22,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 EXPRESSION [] POSTHOOK: Lineage: vector_date_1.dt2 EXPRESSION [] +_col0 _col1 PREHOOK: query: insert into table vector_date_1 select date '1999-12-31', date '2000-01-01' from src limit 1 PREHOOK: type: QUERY @@ -34,6 +35,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] +_c0 _c1 PREHOOK: query: insert into table vector_date_1 select date '2001-01-01', date '2001-06-01' from src limit 1 PREHOOK: type: QUERY @@ -46,7 +48,20 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@vector_date_1 POSTHOOK: Lineage: vector_date_1.dt1 SIMPLE [] POSTHOOK: Lineage: vector_date_1.dt2 SIMPLE [] -PREHOOK: query: explain +_c0 _c1 +PREHOOK: query: select * from vector_date_1 order by dt1, dt2 +PREHOOK: type: QUERY +PREHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from vector_date_1 order by dt1, dt2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vector_date_1 +#### A masked pattern was here #### +vector_date_1.dt1 vector_date_1.dt2 +NULL NULL +1999-12-31 2000-01-01 +2001-01-01 2001-06-01 +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -60,7 +75,7 @@ select dt2 > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all true @@ -74,6 +89,11 @@ select dt2 > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -91,26 +111,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 = dt1) (type: boolean), (dt1 <> dt2) (type: boolean), (dt1 <= dt1) (type: boolean), (dt1 <= dt2) (type: boolean), (dt1 < dt2) (type: boolean), (dt2 >= dt2) (type: boolean), (dt2 >= dt1) (type: boolean), (dt2 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColNotEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessEqualLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColLessEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColLessLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterEqualLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColGreaterEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColGreaterLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,10 +222,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 true true true true true true true true 2001-01-01 2001-06-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -170,7 +240,7 @@ select dt2 < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2, -- should be all false @@ -184,6 +254,11 @@ select dt2 < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -201,26 +276,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), dt2 (type: date), (dt1 <> dt1) (type: boolean), (dt1 = dt2) (type: boolean), (dt1 < dt1) (type: boolean), (dt1 >= dt2) (type: boolean), (dt1 > dt2) (type: boolean), (dt2 > dt2) (type: boolean), (dt2 <= dt1) (type: boolean), (dt2 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: LongColNotEqualLongColumn(col 0:date, col 0:date) -> 3:boolean, LongColEqualLongColumn(col 0:date, col 1:date) -> 4:boolean, LongColLessLongColumn(col 0:date, col 0:date) -> 5:boolean, LongColGreaterEqualLongColumn(col 0:date, col 1:date) -> 6:boolean, LongColGreaterLongColumn(col 0:date, col 1:date) -> 7:boolean, LongColGreaterLongColumn(col 1:date, col 1:date) -> 8:boolean, LongColLessEqualLongColumn(col 1:date, col 0:date) -> 9:boolean, LongColLessLongColumn(col 1:date, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean, VALUE._col8:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -263,10 +387,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 _c2 _c3 _c4 _c5 _c6 _c7 _c8 _c9 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 2000-01-01 false false false false false false false false 2001-01-01 2001-06-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -280,7 +405,7 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should be all true @@ -294,6 +419,11 @@ select date '1970-01-01' < dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -311,26 +441,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 <> 1970-01-01) (type: boolean), (dt1 >= 1970-01-01) (type: boolean), (dt1 > 1970-01-01) (type: boolean), (dt1 <= 2100-01-01) (type: boolean), (dt1 < 2100-01-01) (type: boolean), (1970-01-01 <> dt1) (type: boolean), (1970-01-01 <= dt1) (type: boolean), (1970-01-01 < dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColNotEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColGreaterEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColGreaterDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColLessEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColLessDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarNotEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarLessEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarLessDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -373,10 +552,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 true true true true true true true true 2001-01-01 true true true true true true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -390,7 +570,7 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, -- should all be false @@ -404,6 +584,11 @@ select date '1970-01-01' > dt1 from vector_date_1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -421,26 +606,75 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Select Operator expressions: dt1 (type: date), (dt1 = 1970-01-01) (type: boolean), (dt1 <= 1970-01-01) (type: boolean), (dt1 < 1970-01-01) (type: boolean), (dt1 >= 2100-01-01) (type: boolean), (dt1 > 2100-01-01) (type: boolean), (1970-01-01 = dt1) (type: boolean), (1970-01-01 >= dt1) (type: boolean), (1970-01-01 > dt1) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3, 4, 5, 6, 7, 8, 9, 10] + selectExpressions: DateColEqualDateScalar(col 0:date, date 1970-01-01) -> 3:boolean, DateColLessEqualDateScalar(col 0:date, date 1970-01-01) -> 4:boolean, DateColLessDateScalar(col 0:date, date 1970-01-01) -> 5:boolean, DateColGreaterEqualDateScalar(col 0:date, date 2100-01-01) -> 6:boolean, DateColGreaterDateScalar(col 0:date, date 2100-01-01) -> 7:boolean, DateScalarEqualDateColumn(date 1970-01-01, col 0:date) -> 8:boolean, DateScalarGreaterEqualDateColumn(date 1970-01-01, col 0:date) -> 9:boolean, DateScalarGreaterDateColumn(date 1970-01-01, col 0:date) -> 10:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:boolean, VALUE._col1:boolean, VALUE._col2:boolean, VALUE._col3:boolean, VALUE._col4:boolean, VALUE._col5:boolean, VALUE._col6:boolean, VALUE._col7:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -483,10 +717,11 @@ from vector_date_1 order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 _c1 _c2 _c3 _c4 _c5 _c6 _c7 _c8 NULL NULL NULL NULL NULL NULL NULL NULL NULL 1999-12-31 false false false false false false false false 2001-01-01 false false false false false false false false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -499,7 +734,7 @@ where and dt2 >= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -512,6 +747,11 @@ where and dt2 >= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -529,29 +769,81 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongColumn(col 0:date, col 0:date), FilterLongColNotEqualLongColumn(col 0:date, col 1:date), FilterLongColLessLongColumn(col 0:date, col 1:date), FilterLongColLessEqualLongColumn(col 0:date, col 1:date), FilterLongColGreaterLongColumn(col 1:date, col 0:date), FilterLongColGreaterEqualLongColumn(col 1:date, col 0:date)) predicate: ((dt1 < dt2) and (dt1 <= dt2) and (dt1 <> dt2) and (dt1 = dt1) and (dt2 > dt1) and (dt2 >= dt1)) (type: boolean) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: dt1 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: date) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -592,9 +884,10 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 1999-12-31 2000-01-01 2001-01-01 2001-06-01 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -609,7 +902,7 @@ where and date '1970-01-01' <= dt1 order by dt1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select dt1, dt2 from vector_date_1 @@ -624,6 +917,11 @@ where and date '1970-01-01' <= dt1 order by dt1 POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -638,15 +936,30 @@ STAGE PLANS: TableScan alias: vector_date_1 Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11323, col 0:date), FilterDateColNotEqualDateScalar(col 0:date, val 0), FilterDateScalarNotEqualDateColumn(val 0, col 0:date)) predicate: ((1970-01-01 <> dt1) and (2001-01-01 = dt1) and (dt1 <> 1970-01-01)) (type: boolean) Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 2001-01-01 (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1] + selectExpressions: ConstantVectorExpression(val 11323) -> 3:date Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -654,6 +967,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Stage: Stage-0 Fetch Operator @@ -693,13 +1021,15 @@ order by dt1 POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 dt2 2001-01-01 2001-06-01 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', date '2001-01-01') POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -720,6 +1050,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:dt1:date, 1:dt2:date, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -756,6 +1087,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: dt1:date, dt2:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -771,6 +1108,7 @@ POSTHOOK: query: SELECT dt1 FROM vector_date_1 WHERE dt1 IN (date '1970-01-01', POSTHOOK: type: QUERY POSTHOOK: Input: default@vector_date_1 #### A masked pattern was here #### +dt1 2001-01-01 PREHOOK: query: drop table vector_date_1 PREHOOK: type: DROPTABLE diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index bca2d2a..f321770 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -1,3 +1,126 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index e46c7f4..130e137 100644 --- ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index f56d9ce..ed17e5c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,6 +92,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -140,7 +147,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -158,7 +165,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -196,6 +203,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -233,6 +241,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -240,13 +254,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -275,6 +289,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -300,8 +315,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -315,14 +332,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -367,14 +397,14 @@ from alltypesorc POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -5110 4607 -PREHOOK: query: explain vectorization expression +4086 3583 +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -403,6 +433,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -428,8 +459,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -443,14 +476,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -514,10 +560,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -540,6 +586,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -570,6 +617,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -588,10 +641,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -614,6 +667,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -644,6 +698,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -662,10 +722,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -688,6 +748,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -718,6 +779,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -754,10 +821,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -780,6 +847,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -810,6 +878,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -828,10 +902,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -854,6 +928,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -884,6 +959,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -902,10 +983,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -928,6 +1009,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -958,6 +1040,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -976,3 +1064,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index bca2d2a..96ad3ad 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -1,3 +1,124 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index c1dd74c..c084568 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -54,6 +54,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -90,6 +91,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -139,7 +146,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -157,7 +164,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -195,6 +202,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -231,6 +239,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -238,13 +252,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -272,6 +286,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -297,8 +312,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -311,14 +328,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -370,7 +400,7 @@ select sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -398,6 +428,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -423,8 +454,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -437,14 +470,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -508,10 +554,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -534,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -563,6 +610,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -581,10 +634,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -607,6 +660,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -636,6 +690,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -654,10 +714,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -680,6 +740,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -709,6 +770,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -745,10 +812,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -771,6 +838,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -800,6 +868,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -818,10 +892,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -844,6 +918,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -873,6 +948,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -891,10 +972,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -917,6 +998,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -946,6 +1028,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -964,3 +1052,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git ql/src/test/results/clientpositive/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/vectorization_nested_udf.q.out index bca2d2a..090c6c6 100644 --- ql/src/test/results/clientpositive/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/vectorization_nested_udf.q.out @@ -1,3 +1,97 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(abs(ctinyint)) from alltypesorc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] + Select Operator + expressions: abs(ctinyint) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [13] + selectExpressions: FuncAbsLongToLong(col 0:tinyint) -> 13:int + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 13:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(abs(ctinyint)) from alltypesorc PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 50e9b0e..9bfb200 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -16,7 +16,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -87,6 +88,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string] Stage: Stage-0 Fetch Operator @@ -136,7 +143,7 @@ POSTHOOK: Input: default@alltypesorc 10583 c c 418 a a 12205 b b -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select csmallint, case @@ -154,7 +161,7 @@ where csmallint = 418 or csmallint = 12205 or csmallint = 10583 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select csmallint, case @@ -189,6 +196,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -225,6 +233,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string] Stage: Stage-0 Fetch Operator @@ -232,13 +246,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then 1 else 0 end) as ceven, sum(case when cint % 2 = 1 then 1 else 0 end) as codd @@ -261,6 +275,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -301,6 +316,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -340,13 +361,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### 5110 4607 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select sum(case when cint % 2 = 0 then cint else 0 end) as ceven, sum(case when cint % 2 = 1 then cint else 0 end) as codd @@ -369,6 +390,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int) outputColumnNames: _col0, _col1 @@ -409,6 +431,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -466,10 +494,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_1 POSTHOOK: Lineage: test_1.attr SCRIPT [] POSTHOOK: Lineage: test_1.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -489,6 +517,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -518,6 +547,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -536,10 +571,10 @@ POSTHOOK: Input: default@test_1 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -559,6 +594,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -588,6 +624,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -606,10 +648,10 @@ POSTHOOK: Input: default@test_1 3 4 1 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -629,6 +671,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0)) outputColumnNames: _col0 @@ -658,6 +701,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:decimal(10,0), attr:decimal(10,0) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)] Stage: Stage-0 Fetch Operator @@ -694,10 +743,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_2 POSTHOOK: Lineage: test_2.attr SCRIPT [] POSTHOOK: Lineage: test_2.member SCRIPT [] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -717,6 +766,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -746,6 +796,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -764,10 +820,10 @@ POSTHOOK: Input: default@test_2 3 4 4 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -787,6 +843,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint) outputColumnNames: _col0 @@ -816,6 +873,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -834,10 +897,10 @@ POSTHOOK: Input: default@test_2 3 4 NULL -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -857,6 +920,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct] Select Operator expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint) outputColumnNames: _col0 @@ -886,6 +950,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: member:bigint, attr:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator @@ -904,3 +974,227 @@ POSTHOOK: Input: default@test_2 NULL NULL 4 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then 1 else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then 1 else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 +PREHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*), sum(a.ceven) +from ( +select + case when cint % 2 = 0 then cint else 0 end as ceven +from alltypesorc) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +12288 248718130534 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +8202 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +5087 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +4086 +PREHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) +from ( +select + (case when cint % 2 = 0 then cint else 0 end) as ceven +from alltypesorc +where cint is null) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3115 diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index b743e64..11a9d17 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + /** * This class supports string and binary data by value reference -- i.e. each field is @@ -309,46 +311,90 @@ public void increaseBufferSpace(int nextElemLength) { /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + BytesColumnVector output = (BytesColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.setVal(0, vector[0], start[0], length[0]); - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.setVal(0, vector[0], start[0], length[0]); + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.setVal(i, vector[i], start[i], length[i]); - } - } - else { - for (int i = 0; i < size; i++) { - output.setVal(i, vector[i], start[i], length[i]); + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.setVal(i, vector[i], start[i], length[i]); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } else { + for(int i = 0; i < size; ++i) { + output.setVal(i, vector[i], start[i], length[i]); + } + } } - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; - output.isNull[i] = isNull[i]; + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.setVal(i, vector[i], start[i], length[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } } - } - else { - System.arraycopy(isNull, 0, output.isNull, 0, size); } } } @@ -390,9 +436,9 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // Fill the all the vector entries with provided value public void fill(byte[] value) { - noNulls = true; isRepeating = true; - setRef(0, value, 0, value.length); + isNull[0] = false; + setVal(0, value, 0, value.length); } // Fill the column vector with nulls diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index bce0bd7..59bfcce 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -181,6 +181,13 @@ protected void flattenPush() { public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + /* + * Copy the current object contents into the output. Only copy selected entries + * as indicated by selectedInUse and the sel array. + */ + public abstract void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); + /** * Initialize the column vector. This method can be overridden by specific column vector types. * Use this method only if the individual type of the column vector is not known, otherwise its diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index e41e19f..4b41e99 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -51,18 +53,17 @@ public DecimalColumnVector(int size, int precision, int scale) { // Fill the all the vector entries with provided value public void fill(HiveDecimal value) { - noNulls = true; isRepeating = true; + isNull[0] = false; if (vector[0] == null) { vector[0] = new HiveDecimalWritable(value); - } else { - vector[0].set(value); } + set(0, value); } @Override public void flatten(boolean selectedInUse, int[] sel, int size) { - // TODO Auto-generated method stub + throw new RuntimeException("Not implemented"); } @Override @@ -149,4 +150,95 @@ public void shallowCopyTo(ColumnVector otherCv) { other.precision = precision; other.vector = vector; } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + @Override + public void copySelected( + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DecimalColumnVector output = (DecimalColumnVector) outputColVector; + + // We do not need to do a column reset since we are carefully changing the output. + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.set(0, vector[0]); + } else { + output.isNull[0] = true; + output.noNulls = false; + output.vector[0].setFromLong(0); + } + output.isRepeating = true; + return; + } + + // Handle normal case + + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.set(i, vector[i]); + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.set(i, vector[i]); + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.set(i, vector[i]); + } + } else { + for(int i = 0; i < size; ++i) { + output.set(i, vector[i]); + } + } + } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } else { + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + output.isNull[i] = false; + output.set(i, vector[i]); + } else { + output.isNull[i] = true; + output.noNulls = false; + } + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index e04af01..e957a57 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -54,52 +54,94 @@ public DoubleColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + DoubleColumnVector output = (DoubleColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + for (int i = 0; i < size; i++) { + output.vector[i] = vector[i]; + } } } } // Fill the column vector with the provided value public void fill(double value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index f813b1b..d2da8dc 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -237,6 +237,7 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { this.totalSeconds[elementNum] = intervalDayTime.getTotalSeconds(); this.nanos[elementNum] = intervalDayTime.getNanos(); + isNull[elementNum] = false; } /** @@ -246,6 +247,7 @@ public void set(int elementNum, HiveIntervalDayTime intervalDayTime) { public void setFromScratchIntervalDayTime(int elementNum) { this.totalSeconds[elementNum] = scratchIntervalDayTime.getTotalSeconds(); this.nanos[elementNum] = scratchIntervalDayTime.getNanos(); + isNull[elementNum] = false; } /** @@ -260,47 +262,87 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + IntervalDayTimeColumnVector output = (IntervalDayTimeColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.totalSeconds[0] = totalSeconds[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.totalSeconds[0] = totalSeconds[0]; + output.nanos[0] = nanos[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.totalSeconds[i] = totalSeconds[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; + } + } else { + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } } - } - else { - System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.totalSeconds[i] = totalSeconds[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -310,8 +352,8 @@ public void copySelected( * @param intervalDayTime */ public void fill(HiveIntervalDayTime intervalDayTime) { - noNulls = true; isRepeating = true; + isNull[0] = false; totalSeconds[0] = intervalDayTime.getTotalSeconds(); nanos[0] = intervalDayTime.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java index 7ecb1e0..b36baac 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -116,4 +116,10 @@ public void unFlatten() { } } + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } + } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 49e9184..753c43c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -54,44 +54,80 @@ public LongColumnVector(int len) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + LongColumnVector output = (LongColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(vector, 0, output.vector, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + System.arraycopy(vector, 0, output.vector, 0, size); + } } - } - else { - System.arraycopy(vector, 0, output.vector, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } @@ -101,51 +137,82 @@ public void copySelected( public void copySelected( boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.vector[0] = vector[0]; // automatic conversion to double is done here - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.vector[0] = vector[0]; // automatic conversion to double is done here + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.vector[i] = vector[i]; - } - } - else { - for(int i = 0; i < size; ++i) { - output.vector[i] = vector[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.vector[i] = vector[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(vector, 0, output.vector, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } else { + System.arraycopy(vector, 0, output.vector, 0, size); + } } - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.vector[i] = vector[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(vector, 0, output.vector, 0, size); } } } // Fill the column vector with the provided value public void fill(long value) { - noNulls = true; isRepeating = true; + isNull[0] = false; vector[0] = value; } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java index 078c9c1..cbb8ee2 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -128,4 +128,10 @@ public void unFlatten() { values.unFlatten(); } } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java index b65c802..54989eb 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -134,4 +134,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index 0e7f86f..90f59ad 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -311,47 +311,87 @@ public void setNullValue(int elementNum) { // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. + @Override public void copySelected( - boolean selectedInUse, int[] sel, int size, TimestampColumnVector output) { + boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) { + + TimestampColumnVector output = (TimestampColumnVector) outputColVector; - // Output has nulls if and only if input has nulls. - output.noNulls = noNulls; + // We do not need to do a column reset since we are carefully changing the output. output.isRepeating = false; // Handle repeating case if (isRepeating) { - output.time[0] = time[0]; - output.nanos[0] = nanos[0]; - output.isNull[0] = isNull[0]; + if (noNulls || !isNull[0]) { + output.isNull[0] = false; + output.time[0] = time[0]; + output.nanos[0] = nanos[0]; + } else { + output.isNull[0] = true; + output.noNulls = false; + } output.isRepeating = true; return; } // Handle normal case - // Copy data values over - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - output.time[i] = time[i]; - output.nanos[i] = nanos[i]; + if (noNulls) { + + // Carefully handle NULLs... + if (!output.noNulls) { + + /* + * Make sure our output results have their isNull entry initialized to false. + * NOTE: We leave outputColVector.noNulls flag alone since we don't clear all + * the isNull entries. + */ + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = false; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } else { + Arrays.fill(output.isNull, 0, size, false); + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } + } else { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; + } + } else { + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); + } } - } - else { - System.arraycopy(time, 0, output.time, 0, size); - System.arraycopy(nanos, 0, output.nanos, 0, size); - } + } else /* there are nulls in our column */ { + + // Carefully handle NULLs... + + /* + * For better performance on LONG/DOUBLE we don't want the conditional + * statements inside the for loop. + */ + output.noNulls = false; - // Copy nulls over if needed - if (!noNulls) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; output.isNull[i] = isNull[i]; + output.time[i] = time[i]; + output.nanos[i] = nanos[i]; } - } - else { + } else { System.arraycopy(isNull, 0, output.isNull, 0, size); + System.arraycopy(time, 0, output.time, 0, size); + System.arraycopy(nanos, 0, output.nanos, 0, size); } } } @@ -361,8 +401,8 @@ public void copySelected( * @param timestamp */ public void fill(Timestamp timestamp) { - noNulls = true; isRepeating = true; + isNull[0] = false; time[0] = timestamp.getTime(); nanos[0] = timestamp.getNanos(); } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java index 448461b..9ace7f3 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -142,4 +142,10 @@ public void setRepeating(boolean isRepeating) { public void shallowCopyTo(ColumnVector otherCv) { throw new UnsupportedOperationException(); // Implement if needed. } + + @Override + public void copySelected(boolean selectedInUse, int[] sel, int size, + ColumnVector outputColVector) { + throw new RuntimeException("Not supported"); + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 42c7e8f..ea13c24 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -136,6 +136,50 @@ private static String toUTF8(Object o) { return o.toString(); } + public String stringifyColumn(int columnNum) { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + b.append("columnNum "); + b.append(columnNum); + b.append(", size "); + b.append(size); + b.append(", selectedInUse "); + b.append(selectedInUse); + ColumnVector colVector = cols[columnNum]; + b.append(", noNulls "); + b.append(colVector.noNulls); + b.append(", isRepeating "); + b.append(colVector.isRepeating); + b.append('\n'); + + final boolean noNulls = colVector.noNulls; + final boolean[] isNull = colVector.isNull; + if (colVector.isRepeating) { + final boolean hasRepeatedValue = (noNulls || !isNull[0]); + for (int i = 0; i < size; i++) { + if (hasRepeatedValue) { + colVector.stringifyValue(b, 0); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } else { + for (int i = 0; i < size; i++) { + final int batchIndex = (selectedInUse ? selected[i] : i); + if (noNulls || !isNull[batchIndex]) { + colVector.stringifyValue(b, batchIndex); + } else { + b.append("NULL"); + } + b.append('\n'); + } + } + return b.toString(); + } + @Override public String toString() { if (size == 0) { diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 657ea34..669dc7f 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -3008,6 +3008,11 @@ private void generateDTIScalarCompareColumn(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIScalarColumnDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); } @@ -3028,6 +3033,30 @@ private void generateFilterDTIScalarCompareColumn(String[] tdesc) throws Excepti className, templateString); } + private String getDTIScalarColumnDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return \"date \" + dt.toString() + \", \" + getColumnParamString(0, colNum);"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + + private String getDTIColumnScalarDisplayBody(String type) { + if (type.equals("date")) { + return + "Date dt = new Date(0);" + + " dt.setTime(DateWritable.daysToMillis((int) value));\n" + + " return getColumnParamString(0, colNum) + \", date \" + dt.toString();"; + } else { + return + " return super.vectorExpressionParameters();"; + } + } + private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { String operatorName = tdesc[1]; String operandType = tdesc[2]; @@ -3040,6 +3069,11 @@ private void generateDTIColumnCompareScalar(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", baseClassName); templateString = templateString.replaceAll("", operandType); + + String vectorExpressionParametersBody = getDTIColumnScalarDisplayBody(operandType); + templateString = templateString.replaceAll( + "", vectorExpressionParametersBody); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); }