diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt index d75aadf..d43e044 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -124,6 +124,11 @@ public class extends VectorExpression { } } } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } @Override diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt index 22106f2..5515c5e 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -78,7 +78,7 @@ public class extends VectorExpression { if (vector1[0] == 1) { arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { - outputColVector.fill(arg3Scalar, batch.selectedInUse, sel, n); + outputColVector.fill(arg3Scalar); } return; } @@ -116,6 +116,9 @@ public class extends VectorExpression { } } } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); } @Override diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt index 4a7a576..4dae9a2 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -76,7 +76,7 @@ public class extends VectorExpression { if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar, batch.selectedInUse, sel, n); + outputColVector.fill(arg2Scalar); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } @@ -118,6 +118,9 @@ public class extends VectorExpression { } } } + + // restore repeating and no nulls indicators + arg3ColVector.unFlatten(); } @Override diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt index ef30d1c..692a1d1 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -16,7 +16,7 @@ * limitations under the License. */ -package org.apache.hadoop.hive.ql.exec.vector.expressions; +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -75,9 +75,9 @@ public class extends VectorExpression { if (arg1ColVector.isRepeating) { if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar, batch.selectedInUse, sel, n); + outputColVector.fill(arg2Scalar); } else { - outputColVector.fill(arg3Scalar, batch.selectedInUse, sel, n); + outputColVector.fill(arg3Scalar); } } else if (arg1ColVector.noNulls) { if (batch.selectedInUse) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index e1d4543..a10feb7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -219,4 +221,93 @@ public Writable getWritableObject(int index) { } return result; } + + /** Copy the current object contents into the output. Only copy selected entries, + * as indicated by selectedInUse and the sel array. + */ + public void copySelected( + boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.setVal(0, vector[0], start[0], length[0]); + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } + else { + for (int i = 0; i < size; i++) { + output.setVal(i, vector[i], start[i], length[i]); + } + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** Simplify vector by brute-force flattening noNulls and isRepeating + * This can be used to reduce combinatorial explosion of code paths in VectorExpressions + * with many arguments, at the expense of loss of some performance. + */ + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + + // setRef is used below and this is safe, because the reference + // is to data owned by this column vector. If this column vector + // gets re-used, the whole thing is re-used together so there + // is no danger of a dangling reference. + + // Only copy data values if entry is not null. The string value + // at position 0 is undefined if the position 0 value is null. + if (noNulls || (!noNulls && !isNull[0])) { + + // loops start at position 1 because position 0 is already set + if (selectedInUse) { + for (int j = 1; j < size; j++) { + int i = sel[j]; + this.setRef(i, vector[0], start[0], length[0]); + } + } else { + for (int i = 1; i < size; i++) { + this.setRef(i, vector[0], start[0], length[0]); + } + } + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + // Fill the all the vector entries with provided value + public void fill(byte[] value) { + noNulls = true; + isRepeating = true; + setRef(0, value, 0, value.length); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 48b87ea..9d8b2de 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -49,6 +49,11 @@ * If so, vector[0] holds the repeating value. */ public boolean isRepeating; + + // Variables to hold state from before flattening so it can be easily restored. + private boolean preFlattenIsRepeating; + private boolean preFlattenNoNulls; + public abstract Writable getWritableObject(int index); /** @@ -76,5 +81,66 @@ public void reset() { noNulls = true; isRepeating = false; } + + abstract public void flatten(boolean selectedInUse, int[] sel, int size); + + // Simplify vector by brute-force flattening noNulls if isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { + + boolean nullFillValue; + + if (noNulls) { + nullFillValue = false; + } else { + nullFillValue = isNull[0]; + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = nullFillValue; + } + } else { + Arrays.fill(isNull, 0, size, nullFillValue); + } + + // all nulls are now explicit + noNulls = false; + } + + public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) { + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } + + /** + * Restore the state of isRepeating and noNulls to what it was + * before flattening. This must only be called just after flattening + * and then evaluating a VectorExpression on the column vector. + * It is an optimization that allows other operations on the same + * column to continue to benefit from the isRepeating and noNulls + * indicators. + */ + public void unFlatten() { + isRepeating = preFlattenIsRepeating; + noNulls = preFlattenNoNulls; + } + + // Record repeating and no nulls state to be restored later. + protected void flattenPush() { + preFlattenIsRepeating = isRepeating; + preFlattenNoNulls = noNulls; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index de997f8..cb23129 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -83,6 +83,7 @@ public void copySelected( if (isRepeating) { output.vector[0] = vector[0]; output.isNull[0] = isNull[0]; + output.isRepeating = true; return; } @@ -113,24 +114,18 @@ public void copySelected( } } - // Fill the selected array entries with provided value - public void fill(double value, boolean selectedInUse, int[] sel, int size) { + // Fill the column vector with the provided value + public void fill(double value) { noNulls = true; - isRepeating = false; - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - vector[i] = value; - } - } else { - Arrays.fill(vector, 0, size, value); - } + isRepeating = true; + vector[0] = value; } + // Simplify vector by brute-force flattening noNulls and isRepeating - // This can be used to reduce combinatorial explosion in VectorExpressions + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. public void flatten(boolean selectedInUse, int[] sel, int size) { - + flattenPush(); if (isRepeating) { isRepeating = false; double repeatVal = vector[0]; @@ -142,30 +137,8 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } else { Arrays.fill(vector, 0, size, repeatVal); } - - if (!noNulls && isNull[0]) { - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = true; - } - } else { - Arrays.fill(isNull, 0, size, true); - } - } - } - - if (noNulls) { - noNulls = false; - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = false; - } - } else { - Arrays.fill(isNull, 0, size, false); - } + flattenRepeatingNulls(selectedInUse, sel, size); } + flattenNoNulls(selectedInUse, sel, size); } - } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index eb8c328..aa05b19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -83,6 +83,7 @@ public void copySelected( if (isRepeating) { output.vector[0] = vector[0]; output.isNull[0] = isNull[0]; + output.isRepeating = true; return; } @@ -126,6 +127,7 @@ public void copySelected( if (isRepeating) { output.vector[0] = vector[0]; // automatic conversion to double is done here output.isNull[0] = isNull[0]; + output.isRepeating = true; return; } @@ -156,25 +158,18 @@ public void copySelected( } } - // Fill the selected array entries with provided value - public void fill(long value, boolean selectedInUse, int[] sel, int size) { + // Fill the column vector with the provided value + public void fill(long value) { noNulls = true; - isRepeating = false; - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - vector[i] = value; - } - } else { - Arrays.fill(vector, 0, size, value); - } + isRepeating = true; + vector[0] = value; } // Simplify vector by brute-force flattening noNulls and isRepeating - // This can be used to reduce combinatorial explosion in VectorExpressions + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. public void flatten(boolean selectedInUse, int[] sel, int size) { - + flattenPush(); if (isRepeating) { isRepeating = false; long repeatVal = vector[0]; @@ -186,29 +181,8 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { } else { Arrays.fill(vector, 0, size, repeatVal); } - - if (!noNulls && isNull[0]) { - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = true; - } - } else { - Arrays.fill(isNull, 0, size, true); - } - } - } - - if (noNulls) { - noNulls = false; - if (selectedInUse) { - for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = false; - } - } else { - Arrays.fill(isNull, 0, size, false); - } + flattenRepeatingNulls(selectedInUse, sel, size); } + flattenNoNulls(selectedInUse, sel, size); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java new file mode 100644 index 0000000..c321ad0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are string columns or string expression results. + */ +public class IfExprStringColumnStringColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public IfExprStringColumnStringColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringColumnStringColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java new file mode 100644 index 0000000..627319a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string column expression. + * The third is a string scalar. + */ +public class IfExprStringColumnStringScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private byte[] arg3Scalar; + private int outputColumn; + + public IfExprStringColumnStringScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprStringColumnStringScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if arg2 has no nulls. + * This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore state of repeating and non nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public byte[] getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar(byte[] value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java new file mode 100644 index 0000000..37636e2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string column or non-constant expression result. + */ +public class IfExprStringScalarStringColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private byte[] arg2Scalar; + private int outputColumn; + + public IfExprStringScalarStringColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringScalarStringColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even arg3 has no + * nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the input + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore state of repeating and non nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public byte[] getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar(byte[] value) { + this.arg2Scalar = value; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java new file mode 100644 index 0000000..f6fcfea --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprStringScalarStringScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column; + private byte[] arg2Scalar; + private byte[] arg3Scalar; + private int outputColumn; + + public IfExprStringScalarStringScalar( + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprStringScalarStringScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + outputColVector.noNulls = true; // output must be a scalar and neither one is null + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public byte[] getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar(byte[] value) { + this.arg2Scalar = value; + } + + public byte[] getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar(byte[] value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index ceedd97..adf55c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -27,20 +27,24 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; /** * IF(expr1,expr2,expr3)
@@ -48,13 +52,16 @@ * otherwise it returns expr3. IF() returns a numeric or string value, depending * on the context in which it is used. */ -@VectorizedExpressions({IfExprLongColumnLongColumn.class, IfExprDoubleColumnDoubleColumn.class, +@VectorizedExpressions({ + IfExprLongColumnLongColumn.class, IfExprDoubleColumnDoubleColumn.class, IfExprLongColumnLongScalar.class, IfExprDoubleColumnDoubleScalar.class, IfExprLongColumnDoubleScalar.class, IfExprDoubleColumnLongScalar.class, IfExprLongScalarLongColumn.class, IfExprDoubleScalarDoubleColumn.class, IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class, IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class, - IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class + IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class, + IfExprStringColumnStringColumn.class, IfExprStringColumnStringScalar.class, + IfExprStringScalarStringColumn.class, IfExprStringScalarStringScalar.class }) public class GenericUDFIf extends GenericUDF { private transient ObjectInspector[] argumentOIs; @@ -117,5 +124,4 @@ public String getDisplayString(String[] children) { sb.append(children[2]).append(")"); return sb.toString(); } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 88437f1..5f6308c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -36,6 +36,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; @@ -53,15 +57,15 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween; @@ -1010,8 +1014,8 @@ public void testInFilters() throws HiveException { /** * Test that correct VectorExpression classes are chosen for the * IF (expr1, expr2, expr3) conditional expression for integer, float, - * boolean and timestamp input types. expr1 is always an input column expression of type - * long. expr2 and expr3 can be column expressions or constants of other types + * boolean, timestamp and string input types. expr1 is always an input column expression + * of type long. expr2 and expr3 can be column expressions or constants of other types * but must have the same type. */ @Test @@ -1137,19 +1141,46 @@ public void testIfConditionalExprs() throws HiveException { ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongColumnLongColumn); - // long column/scalar IF + // column/scalar IF children1.set(2, new ExprNodeConstantDesc(true)); ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongColumnLongScalar); - // long scalar/scalar IF + // scalar/scalar IF children1.set(1, new ExprNodeConstantDesc(true)); ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongScalarLongScalar); - // long scalar/column IF + // scalar/column IF children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); assertTrue(ve instanceof IfExprLongScalarLongColumn); + + // test for string type + constDesc2 = new ExprNodeConstantDesc("Alpha"); + constDesc3 = new ExprNodeConstantDesc("Bravo"); + col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(String.class, "col3", "table", false); + + // column/column + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringColumnStringColumn); + + // column/scalar + children1.set(2, constDesc3); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringColumnStringScalar); + + // scalar/scalar + children1.set(1, constDesc2); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringScalarStringScalar); + + // scalar/column + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringScalarStringColumn); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java index a250c9d..51a73c1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java @@ -218,4 +218,64 @@ public static void setRepeatingDoubleCol(DoubleColumnVector col) { col.isRepeating = true; col.vector[0] = 50.0; } + + @Test + public void testFlatten() { + verifyFlatten(new LongColumnVector()); + verifyFlatten(new DoubleColumnVector()); + verifyFlatten(new BytesColumnVector()); + } + + private void verifyFlatten(ColumnVector v) { + + // verify that flattening and unflattenting no-nulls works + v.noNulls = true; + v.isNull[1] = true; + int[] sel = {0, 2}; + int size = 2; + v.flatten(true, sel, size); + Assert.assertFalse(v.noNulls); + Assert.assertFalse(v.isNull[0] || v.isNull[2]); + v.unFlatten(); + Assert.assertTrue(v.noNulls); + + // verify that flattening and unflattening "isRepeating" works + v.isRepeating = true; + v.noNulls = false; + v.isNull[0] = true; + v.flatten(true, sel, 2); + Assert.assertFalse(v.noNulls); + Assert.assertTrue(v.isNull[0] && v.isNull[2]); + Assert.assertFalse(v.isRepeating); + v.unFlatten(); + Assert.assertFalse(v.noNulls); + Assert.assertTrue(v.isRepeating); + + // verify extension of values in the array + v.noNulls = true; + if (v instanceof LongColumnVector) { + ((LongColumnVector) v).vector[0] = 100; + v.flatten(true, sel, 2); + Assert.assertTrue(((LongColumnVector) v).vector[2] == 100); + } else if (v instanceof DoubleColumnVector) { + ((DoubleColumnVector) v).vector[0] = 200d; + v.flatten(true, sel, 2); + Assert.assertTrue(((DoubleColumnVector) v).vector[2] == 200d); + } else if (v instanceof BytesColumnVector) { + BytesColumnVector bv = (BytesColumnVector) v; + byte[] b = null; + try { + b = "foo".getBytes("UTF-8"); + } catch (Exception e) { + ; // eat it + } + bv.setRef(0, b, 0, b.length); + bv.flatten(true, sel, 2); + Assert.assertEquals(bv.vector[0], bv.vector[2]); + Assert.assertEquals(bv.start[0], bv.start[2]); + Assert.assertEquals(bv.length[0], bv.length[2]); + } + } + + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java index 7eb3b24..3914245 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -19,24 +19,23 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import static org.junit.Assert.*; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.util.Arrays; -import junit.framework.Assert; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; import org.junit.Test; @@ -72,7 +71,7 @@ private VectorizedRowBatch getBatch4LongVectors() { v.vector[3] = 4; batch.cols[2] = v; - // set output colum + // set output column batch.cols[3] = new LongColumnVector(); batch.size = 4; @@ -106,13 +105,76 @@ private VectorizedRowBatch getBatch1Long3DoubleVectors() { v.vector[3] = 4; batch.cols[2] = v; - // set output colum + // set output column batch.cols[3] = new DoubleColumnVector(); batch.size = 4; return batch; } + private VectorizedRowBatch getBatch1Long3BytesVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector lv = new LongColumnVector(); + + // set first argument to IF -- boolean flag + lv.vector[0] = 0; + lv.vector[1] = 0; + lv.vector[2] = 1; + lv.vector[3] = 1; + batch.cols[0] = lv; + + // set second argument to IF + BytesColumnVector v = new BytesColumnVector(); + v.initBuffer(); + setString(v, 0, "arg2_0"); + setString(v, 1, "arg2_1"); + setString(v, 2, "arg2_2"); + setString(v, 3, "arg2_3"); + + batch.cols[1] = v; + + // set third argument to IF + v = new BytesColumnVector(); + v.initBuffer(); + setString(v, 0, "arg3_0"); + setString(v, 1, "arg3_1"); + setString(v, 2, "arg3_2"); + setString(v, 3, "arg3_3"); + batch.cols[2] = v; + + // set output column + v = new BytesColumnVector(); + v.initBuffer(); + batch.cols[3] = v; + batch.size = 4; + return batch; + } + + private void setString(BytesColumnVector v, int i, String s) { + byte[] b = getUTF8Bytes(s); + v.setVal(i, b, 0, b.length); + } + + private byte[] getUTF8Bytes(String s) { + byte[] b = null; + try { + b = s.getBytes("UTF-8"); + } catch (Exception e) { + ; // eat it + } + return b; + } + + private String getString(BytesColumnVector v, int i) { + String s = null; + try { + s = new String(v.vector[i], v.start[i], v.length[i], "UTF-8"); + } catch (Exception e) { + ; // eat it + } + return s; + } + @Test public void testLongColumnColumnIfExpr() { VectorizedRowBatch batch = getBatch4LongVectors(); @@ -322,4 +384,134 @@ public void testDoubleColumnScalarIfExpr() { assertEquals(true, -3d == r.vector[2]); assertEquals(true, -4d == r.vector[3]); } + + @Test + public void testIfExprStringColumnStringColumn() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + VectorExpression expr = new IfExprStringColumnStringColumn(0, 1, 2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + + // test first IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[0].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg3_2")); + assertTrue(getString(r, 3).equals("arg3_3")); + + // test second IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[1].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_0")); + assertTrue(getString(r, 3).equals("arg2_0")); + + // test third IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[2].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_0")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + + // test second IF argument with nulls + batch = getBatch1Long3BytesVectors(); + batch.cols[1].noNulls = false; + batch.cols[1].isNull[2] = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(!r.noNulls && r.isNull[2]); + assertTrue(getString(r, 3).equals("arg2_3")); + assertFalse(r.isNull[0] || r.isNull[1] || r.isNull[3]); + + // test third IF argument with nulls + batch = getBatch1Long3BytesVectors(); + batch.cols[2].noNulls = false; + batch.cols[2].isNull[0] = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(!r.noNulls && r.isNull[0]); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + assertFalse(r.isNull[1] || r.isNull[2] || r.isNull[3]); + + // test second IF argument with nulls and repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[1].noNulls = false; + batch.cols[1].isNull[0] = true; + batch.cols[1].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(!r.noNulls && r.isNull[2]); + assertTrue(!r.noNulls && r.isNull[3]); + assertFalse(r.isNull[0] || r.isNull[1]); + } + + @Test + public void testIfExprStringColumnStringScalar() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar = getUTF8Bytes("scalar"); + VectorExpression expr = new IfExprStringColumnStringScalar(0, 1, scalar, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("scalar")); + assertTrue(getString(r, 1).equals("scalar")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + } + + @Test + public void testIfExprStringScalarStringColumn() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar = getUTF8Bytes("scalar"); + VectorExpression expr = new IfExprStringScalarStringColumn(0,scalar, 2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("scalar")); + assertTrue(getString(r, 3).equals("scalar")); + } + + @Test + public void testIfExprStringScalarStringScalar() { + + // standard case + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar1 = getUTF8Bytes("scalar1"); + byte[] scalar2 = getUTF8Bytes("scalar2"); + VectorExpression expr = new IfExprStringScalarStringScalar(0,scalar1, scalar2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("scalar2")); + assertTrue(getString(r, 1).equals("scalar2")); + assertTrue(getString(r, 2).equals("scalar1")); + assertTrue(getString(r, 3).equals("scalar1")); + assertFalse(r.isRepeating); + + // repeating case for first (boolean flag) argument to IF + batch = getBatch1Long3BytesVectors(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + r = (BytesColumnVector) batch.cols[3]; + assertTrue(r.isRepeating); + assertTrue(getString(r, 0).equals("scalar2")); + } }