diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 60f4667..d3bb28e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -35,6 +35,7 @@ public class DoubleColumnVector extends ColumnVector { public double[] vector; private final DoubleWritable writableObj = new DoubleWritable(); + public static final double NULL_VALUE = Double.NaN; /** * Use this constructor by default. All column vectors diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 3a866ef..f65e8fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -35,6 +35,7 @@ public class LongColumnVector extends ColumnVector { public long[] vector; private final LongWritable writableObj = new LongWritable(); + public static final long NULL_VALUE = 1; /** * Use this constructor by default. All column vectors diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java new file mode 100644 index 0000000..e1867a2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; + + +/** + * Utility functions to handle null propagation. + */ +public class NullUtil { + + /* + * Set the data value for all NULL entries to the designated NULL_VALUE. + */ + public static void setNullDataEntriesLong( + LongColumnVector v, boolean selectedInUse, int[] sel, int n) { + if (v.noNulls) { + return; + } else if (v.isRepeating && v.isNull[0]) { + v.vector[0] = LongColumnVector.NULL_VALUE; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if(v.isNull[i]) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } else { + for (int i = 0; i != n; i++) { + if(v.isNull[i]) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } + } + + // for use by Column-Scalar and Scalar-Column arithmetic for null propagation + public static void setNullOutputEntriesColScalar( + ColumnVector v, boolean selectedInUse, int[] sel, int n) { + if (v instanceof DoubleColumnVector) { + + // No need to set null data entries because the input NaN values + // will automatically propagate to the output. + return; + } + setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); + } + + /* + * Set the data value for all NULL entries to NaN + */ + public static void setNullDataEntriesDouble( + DoubleColumnVector v, boolean selectedInUse, int[] sel, int n) { + if (v.noNulls) { + return; + } else if (v.isRepeating && v.isNull[0]) { + v.vector[0] = DoubleColumnVector.NULL_VALUE; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if(v.isNull[i]) { + v.vector[i] = DoubleColumnVector.NULL_VALUE; + } + } + } else { + for (int i = 0; i != n; i++) { + if(v.isNull[i]) { + v.vector[i] = DoubleColumnVector.NULL_VALUE; + } + } + } + } + + /* + * Propagate null values for a two-input operator. + */ + public static void propagateNullsColCol(ColumnVector inputColVector1, + ColumnVector inputColVector2, ColumnVector outputColVector, int[] sel, + int n, boolean selectedInUse) { + + outputColVector.noNulls = inputColVector1.noNulls && inputColVector2.noNulls; + + if (inputColVector1.noNulls && !inputColVector2.noNulls) { + if (inputColVector2.isRepeating) { + outputColVector.isNull[0] = inputColVector2.isNull[0]; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector2.isNull[i]; + } + } else { + System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n); + } + } + } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating) { + outputColVector.isNull[0] = inputColVector1.isNull[0]; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector1.isNull[i]; + } + } else { + System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n); + } + } + } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isNull[0] = inputColVector1.isNull[0] || inputColVector2.isNull[0]; + if (outputColVector.isNull[0]) { + outputColVector.isRepeating = true; + return; + } + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (inputColVector1.isNull[0]) { + outputColVector.isNull[0] = true; + outputColVector.isRepeating = true; // because every value will be NULL + return; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector2.isNull[i]; + } + } else { + + // copy nulls from the non-repeating side + System.arraycopy(inputColVector2.isNull, 0, outputColVector.isNull, 0, n); + } + } + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (inputColVector2.isNull[0]) { + outputColVector.isNull[0] = true; + outputColVector.isRepeating = true; // because every value will be NULL + return; + } else { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector1.isNull[i]; + } + } else { + + // copy nulls from the non-repeating side + System.arraycopy(inputColVector1.isNull, 0, outputColVector.isNull, 0, n); + } + } + } else { // neither side is repeating + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; + } + } + } + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java index c584fb8..a234fc2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] + vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] + vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java index 4c22f7b..853a358 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColAddDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] + value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] + value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java index d22d7cf..d67ebba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] + vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] + vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java index d2c2229..9ebd33c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColAddLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColAddLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] + value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] + value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java index 0b3e1fe..9420d94 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] / vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] / vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java index ab2713b..88fde1f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColDivideDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] / value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] / value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java index 167b997..47de451 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] / vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] / vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java index 210d05c..ddb5471 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColDivideLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColDivideLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] / value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] / value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java index 0e0c9ce..02b1c4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] % vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] % vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java index 8e65620..97ca799 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColModuloDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] % value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] % value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java index be0bdb0..a38e5f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] % vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] % vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java index 7dbd7ab..4ca1937 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColModuloLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColModuloLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] % value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] % value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java index aed0408..aff33a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] * vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] * vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java index 577e2b6..322f9ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColMultiplyDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] * value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] * value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java index 87f7cf5..c73eb4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] * vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] * vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java index 538e51f..5505ac3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColMultiplyLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColMultiplyLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] * value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] * value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java index 15abdbd..805e4a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] - vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] - vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java index 4badb76..905a9c1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColSubtractDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] - value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] - value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java index f666e19..9dc06ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; double[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] - vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] - vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java index 6a68958..e53e7be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleColSubtractLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class DoubleColSubtractLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] - value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] - value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java index 3a14d22..9b7411d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value + vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value + vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java index 5362afb..e9caf5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarAddLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value + vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value + vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java index faad560..1564b46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value / vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value / vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java index 5fe7419..402c9ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarDivideLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value / vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value / vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java index aa40b0d..6e122c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value % vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value % vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java index 0add898..5fc4d07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarModuloLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value % vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value % vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java index ad9128c..0d7a82f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value * vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value * vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java index 8edceea..997e4ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarMultiplyLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value * vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value * vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java index c17e612..5a7434c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value - vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value - vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java index 4aae322..7867f51 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/DoubleScalarSubtractLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value - vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value - vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java index cda87de..6c41827 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] + vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] + vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java index e511b00..b0b4615 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColAddDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] + value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] + value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java index b3ed12a..8ac54f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - long[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] + vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] + vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java index f7403cf..5b10f39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColAddLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColAddLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] + value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] + value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] + value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java index 02b4120..799ea0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] / vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] / vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java index 26885c2..ba539ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColDivideDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColDivideDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] / value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] / value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] / value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java index 568390c..f9cd330 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] % vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] % vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java index 8f984b2..879c5c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColModuloDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] % value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] % value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java index 7f7902b..db152b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - long[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] % vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] % vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java index d5b857b..a571280 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColModuloLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColModuloLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] % value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] % value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] % value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java index 3153305..6fb734e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] * vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] * vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java index fbe9f0d..1d47276 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColMultiplyDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] * value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] * value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java index f137271..a436e9f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - long[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] * vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] * vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java index 7ddf4a5..ea5ef16 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColMultiplyLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColMultiplyLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] * value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] * value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] * value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java index 0287e3c..517424a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; double[] vector2 = inputColVector2.vector; - double[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] - vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] - vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesDouble(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java index d92a626..10aeacc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractDoubleScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColSubtractDoubleScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] - value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] - value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java index c806c67..f1dc4e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongColumn.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public void evaluate(VectorizedRowBatch batch) { int n = batch.size; long[] vector1 = inputColVector1.vector; long[] vector2 = inputColVector2.vector; - long[] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public void evaluate(VectorizedRowBatch batch) { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] - vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public void evaluate(VectorizedRowBatch batch) { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] - vector2[i]; } @@ -164,6 +102,14 @@ public void evaluate(VectorizedRowBatch batch) { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntriesLong(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java index c106de0..84c0601 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongColSubtractLongScalar.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class LongColSubtractLongScalar extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] - value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; } @@ -74,10 +74,9 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = vector[i] - value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] - value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java index a0fd2c7..9dafc7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value + vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value + vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java index b016d56..1e795a9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarAddLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value + vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value + vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java index 461c294..4147185 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarDivideDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value / vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value / vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java index 1c06e79..4c25195 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value % vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value % vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java index 66eadb0..7729d70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarModuloLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value % vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value % vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java index 90d626c..fd4a825 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value * vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value * vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java index 681ded9..ca1e1ad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarMultiplyLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value * vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value * vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java index 6d7c003..71cd1dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractDoubleColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; double[] vector = inputColVector.vector; double[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value - vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value - vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java index 0689b83..53b68f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/LongScalarSubtractLongColumn.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public void evaluate(VectorizedRowBatch batch) { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; long[] vector = inputColVector.vector; long[] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public void evaluate(VectorizedRowBatch batch) { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value - vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public void evaluate(VectorizedRowBatch batch) { outputVector[i] = value - vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public void evaluate(VectorizedRowBatch batch) { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java index 3ef2aa8..773f75b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java @@ -510,6 +510,7 @@ private void generateColumnBinaryOperatorColumn(String[] tdesc, String returnTyp templateString = templateString.replaceAll("", operandType1); templateString = templateString.replaceAll("", operandType2); templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", getCamelCaseType(returnType)); writeFile(outputFile, templateString); if(returnType==null){ @@ -649,7 +650,10 @@ static String readFile(String templateFile) throws IOException { return b.toString(); } - static String getCamelCaseType(String type) { + static String getCamelCaseType(String type) { + if (type == null) { + return null; + } if (type.equals("long")) { return "Long"; } else if (type.equals("double")) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt index 0e56721..0f4bc67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticColumn.txt @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; import org.apache.hadoop.hive.ql.exec.vector.*; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -47,7 +48,6 @@ public class extends VectorExpression { int n = batch.size; [] vector1 = inputColVector1.vector; [] vector2 = inputColVector2.vector; - [] outputVector = outputColVector.vector; // return immediately if batch is empty @@ -55,81 +55,19 @@ public class extends VectorExpression { return; } - /* Set repeating property to false (the default). - * It will be set to true later if needed later. - */ - outputColVector.isRepeating = false; - - //Handle nulls first - if (inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isNull[0] = true; - outputColVector.isRepeating = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector2.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i]; - } - } - } - } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating || inputColVector2.isRepeating) { - //Output will also be repeating and null - outputColVector.isRepeating = true; - outputColVector.isNull[0] = true; - //return as no further processing is needed - return; - } else { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outputColVector.isNull[i] = inputColVector1.isNull[i] || inputColVector2.isNull[i]; - } - } - } - } - - - //Disregard nulls for processing + outputColVector.isRepeating = inputColVector1.isRepeating && inputColVector2.isRepeating; + + // Handle nulls first + NullUtil.propagateNullsColCol( + inputColVector1, inputColVector2, outputColVector, sel, n, batch.selectedInUse); + + /* Disregard nulls for processing. In other words, + * the arithmetic operation is performed even if one or + * more inputs are null. This is to improve speed by avoiding + * conditional checks in the inner loop. + */ if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector1[0] vector2[0]; - outputColVector.isRepeating = true; } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -154,7 +92,7 @@ public class extends VectorExpression { } } else { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector1[i] vector2[i]; } @@ -164,6 +102,14 @@ public class extends VectorExpression { } } } + + /* For the case when the output can have null values, follow + * the convention that the data values must be 1 for long and + * NaN for double. This is to prevent possible later zero-divide errors + * in complex arithmetic expressions like col2 / (col1 - 1) + * in the case when some col1 entries are null. + */ + NullUtil.setNullDataEntries(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt index 474a979..15fdbad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ColumnArithmeticScalar.txt @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; public class extends VectorExpression { private int colNum; @@ -47,6 +48,7 @@ public class extends VectorExpression { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; @@ -57,15 +59,13 @@ public class extends VectorExpression { } if (inputColVector.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. outputVector[0] = vector[0] value; + // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] value; } @@ -74,10 +74,9 @@ public class extends VectorExpression { outputVector[i] = vector[i] value; } } - outputColVector.isRepeating = false; } else /* there are nulls */ { if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] value; outputIsNull[i] = inputIsNull[i]; @@ -88,8 +87,9 @@ public class extends VectorExpression { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt index 9899e7e..0f82f5b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/ScalarArithmeticColumn.txt @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; /** * Implements a vectorized arithmetic operator with a scalar on the left and a @@ -62,6 +63,7 @@ public class extends VectorExpression { boolean[] inputIsNull = inputColVector.isNull; boolean[] outputIsNull = outputColVector.isNull; outputColVector.noNulls = inputColVector.noNulls; + outputColVector.isRepeating = inputColVector.isRepeating; int n = batch.size; [] vector = inputColVector.vector; [] outputVector = outputColVector.vector; @@ -72,16 +74,10 @@ public class extends VectorExpression { } if (inputColVector.isRepeating) { - - /* - * All must be selected otherwise size would be zero - * Repeating property will not change. - */ outputVector[0] = value vector[0]; // Even if there are no nulls, we always copy over entry 0. Simplifies code. outputIsNull[0] = inputIsNull[0]; - outputColVector.isRepeating = true; } else if (inputColVector.noNulls) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -93,7 +89,6 @@ public class extends VectorExpression { outputVector[i] = value vector[i]; } } - outputColVector.isRepeating = false; } else { /* there are nulls */ if (batch.selectedInUse) { for(int j = 0; j != n; j++) { @@ -107,8 +102,9 @@ public class extends VectorExpression { } System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); } - outputColVector.isRepeating = false; } + + NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } @Override diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java index 42872b2..9efac53 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorArithmeticExpressions.java @@ -61,22 +61,25 @@ private VectorizedRowBatch getVectorizedRowBatchSingleLongVector(int size) { @Test public void testLongColAddLongScalarWithNulls() { - VectorizedRowBatch vrg = getVectorizedRowBatchSingleLongVector + VectorizedRowBatch batch = getVectorizedRowBatchSingleLongVector (VectorizedRowBatch.DEFAULT_SIZE); - LongColumnVector lcv = (LongColumnVector) vrg.cols[0]; + LongColumnVector lcv = (LongColumnVector) batch.cols[0]; + LongColumnVector lcvOut = (LongColumnVector) batch.cols[1]; TestVectorizedRowBatch.addRandomNulls(lcv); LongColAddLongScalar expr = new LongColAddLongScalar(0, 23, 1); - expr.evaluate(vrg); - //verify + expr.evaluate(batch); + + // verify for (int i=0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { if (!lcv.isNull[i]) { - Assert.assertEquals(i*37+23, ((LongColumnVector)vrg.cols[1]).vector[i]); + Assert.assertEquals(i*37+23, lcvOut.vector[i]); } else { - Assert.assertTrue(((LongColumnVector)vrg.cols[1]).isNull[i]); + Assert.assertTrue(lcvOut.isNull[i]); } } - Assert.assertFalse(((LongColumnVector)vrg.cols[1]).noNulls); - Assert.assertFalse(((LongColumnVector)vrg.cols[1]).isRepeating); + Assert.assertFalse(lcvOut.noNulls); + Assert.assertFalse(lcvOut.isRepeating); + verifyLongNullDataVectorEntries(lcvOut, batch.selected, batch.selectedInUse, batch.size); } @Test @@ -117,6 +120,36 @@ public void testLongColAddLongScalarWithRepeating() { Assert.assertTrue(out.isRepeating); Assert.assertFalse(out.noNulls); Assert.assertEquals(true, out.isNull[0]); + verifyLongNullDataVectorEntries(out, batch.selected, batch.selectedInUse, batch.size); + } + + /* Make sure all the NULL entries in this long column output vector have their data vector + * element set to the correct value, as per the specification, to prevent later arithmetic + * errors (e.g. zero-divide). + */ + public static void verifyLongNullDataVectorEntries( + LongColumnVector v, int[] sel, boolean selectedInUse, int n) { + if (n == 0 || v.noNulls) { + return; + } else if (v.isRepeating) { + if (v.isNull[0]) { + assertEquals(LongColumnVector.NULL_VALUE, v.vector[0]); + } + } + else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (v.isNull[i]) { + assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]); + } + } + } else { + for (int i = 0; i != n; i++) { + if (v.isNull[i]) { + assertEquals(LongColumnVector.NULL_VALUE, v.vector[i]); + } + } + } } @Test @@ -138,7 +171,7 @@ public void testLongColAddLongColumn() { } assertTrue(lcv2.noNulls); - //Now set one column nullable + // Now set one column nullable lcv1.noNulls = false; lcv1.isNull[1] = true; lcv2.isRepeating = true; // set output isRepeating to true to make sure it gets over-written @@ -147,8 +180,9 @@ public void testLongColAddLongColumn() { assertTrue(lcv2.isNull[1]); assertFalse(lcv2.noNulls); assertFalse(lcv2.isRepeating); + verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size); - //Now set other column nullable too + // Now set other column nullable too lcv0.noNulls = false; lcv0.isNull[1] = true; lcv0.isNull[3] = true; @@ -156,8 +190,9 @@ public void testLongColAddLongColumn() { assertTrue(lcv2.isNull[1]); assertTrue(lcv2.isNull[3]); assertFalse(lcv2.noNulls); + verifyLongNullDataVectorEntries(lcv2, vrg.selected, vrg.selectedInUse, vrg.size); - //Now test with repeating flag + // Now test with repeating flag lcv3.isRepeating = true; LongColAddLongColumn expr2 = new LongColAddLongColumn(3, 4, 5); expr2.evaluate(vrg); @@ -165,14 +200,15 @@ public void testLongColAddLongColumn() { assertEquals(seed * ( 4 + 5*(i+1)), lcv5.vector[i]); } - //Repeating with other as nullable + // Repeating with other as nullable lcv4.noNulls = false; lcv4.isNull[0] = true; expr2.evaluate(vrg); assertTrue(lcv5.isNull[0]); assertFalse(lcv5.noNulls); + verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size); - //Repeating null value + // Repeating null value lcv3.isRepeating = true; lcv3.noNulls = false; lcv3.isNull[0] = true; @@ -180,5 +216,18 @@ public void testLongColAddLongColumn() { assertFalse(lcv5.noNulls); assertTrue(lcv5.isRepeating); assertTrue(lcv5.isNull[0]); + verifyLongNullDataVectorEntries(lcv5, vrg.selected, vrg.selectedInUse, vrg.size); + + // Neither input has nulls. Verify that this propagates to output. + vrg.selectedInUse = false; + lcv0.noNulls = true; + lcv1.noNulls = true; + lcv0.isRepeating = false; + lcv1.isRepeating = false; + lcv2.noNulls = false; // set output noNulls to true to make sure it gets over-written + lcv2.isRepeating = true; // similarly with isRepeating + expr.evaluate(vrg); + assertTrue(lcv2.noNulls); + assertFalse(lcv2.isRepeating); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java index a58f837..257b3a1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorScalarColArithmetic.java @@ -105,6 +105,8 @@ public void testLongScalarSubtractLongColWithNulls() { } Assert.assertFalse(((LongColumnVector)batch.cols[1]).noNulls); Assert.assertFalse(((LongColumnVector)batch.cols[1]).isRepeating); + TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries( + (LongColumnVector) batch.cols[1], batch.selected, batch.selectedInUse, batch.size); } @Test @@ -147,5 +149,7 @@ public void testLongScalarSubtractLongColWithRepeating() { Assert.assertTrue(out.isRepeating); Assert.assertFalse(out.noNulls); Assert.assertEquals(true, out.isNull[0]); + TestVectorArithmeticExpressions.verifyLongNullDataVectorEntries( + out, batch.selected, batch.selectedInUse, batch.size); } }