diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index a15eca0..df28b6c 100644 --- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -385,9 +385,25 @@ // See org.apache.hadoop.hive.ql.exec.vector.expressions for remaining cast VectorExpression // classes - {"ColumnUnaryMinus", "long"}, - {"ColumnUnaryMinus", "double"}, - + {"ColumnUnaryMinus", "long"}, + {"ColumnUnaryMinus", "double"}, + + // IF conditional expression + // fileHeader, resultType, arg2Type, arg3Type + {"IfExprColumnColumn", "long"}, + {"IfExprColumnColumn", "double"}, + {"IfExprColumnScalar", "long", "long"}, + {"IfExprColumnScalar", "double", "long"}, + {"IfExprColumnScalar", "long", "double"}, + {"IfExprColumnScalar", "double", "double"}, + {"IfExprScalarColumn", "long", "long"}, + {"IfExprScalarColumn", "double", "long"}, + {"IfExprScalarColumn", "long", "double"}, + {"IfExprScalarColumn", "double", "double"}, + {"IfExprScalarScalar", "long", "long"}, + {"IfExprScalarScalar", "double", "long"}, + {"IfExprScalarScalar", "long", "double"}, + {"IfExprScalarScalar", "double", "double"}, // template, , , , , {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", @@ -557,6 +573,14 @@ private void generate() throws Exception { generateFilterStringColumnCompareColumn(tdesc); } else if (tdesc[0].equals("StringColumnCompareColumn")) { generateStringColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("IfExprColumnColumn")) { + generateIfExprColumnColumn(tdesc); + } else if (tdesc[0].equals("IfExprColumnScalar")) { + generateIfExprColumnScalar(tdesc); + } else if (tdesc[0].equals("IfExprScalarColumn")) { + generateIfExprScalarColumn(tdesc); + } else if (tdesc[0].equals("IfExprScalarScalar")) { + generateIfExprScalarScalar(tdesc); } else { continue; } @@ -792,6 +816,85 @@ private void generateColumnUnaryMinus(String[] tdesc) throws IOException { writeFile(outputFile, templateString); } + private void generateIfExprColumnColumn(String[] tdesc) throws IOException { + String operandType = tdesc[1]; + String inputColumnVectorType = this.getColumnVectorType(operandType); + String outputColumnVectorType = inputColumnVectorType; + String returnType = operandType; + String className = "IfExpr" + getCamelCaseType(operandType) + "Column" + + getCamelCaseType(operandType) + "Column"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + writeFile(outputFile, templateString); + } + + private void generateIfExprColumnScalar(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg2ColumnVectorType = this.getColumnVectorType(operandType2); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Column" + + getCamelCaseType(operandType3) + "Scalar"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", arg2ColumnVectorType); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(outputFile, templateString); + } + + private void generateIfExprScalarColumn(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg3ColumnVectorType = this.getColumnVectorType(operandType3); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Scalar" + + getCamelCaseType(operandType3) + "Column"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", arg3ColumnVectorType); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(outputFile, templateString); + } + + private void generateIfExprScalarScalar(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg3ColumnVectorType = this.getColumnVectorType(operandType3); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Scalar" + + getCamelCaseType(operandType3) + "Scalar"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(outputFile, templateString); + } + // template, , , , , , private void generateColumnUnaryFunc(String[] tdesc) throws IOException { String classNamePrefix = tdesc[1]; diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt new file mode 100644 index 0000000..d75aadf --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public (int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + arg3ColVector = () batch.cols[arg3Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector2 = arg2ColVector.vector; + [] vector3 = arg3ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt new file mode 100644 index 0000000..22106f2 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private arg3Scalar; + private int outputColumn; + + public (int arg1Column, int arg2Column, arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector2 = arg2ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar, batch.selectedInUse, sel, n); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar( value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt new file mode 100644 index 0000000..4a7a576 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private arg2Scalar; + private int outputColumn; + + public (int arg1Column, arg2Scalar, int arg3Column, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg3ColVector = () batch.cols[arg3Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector3 = arg3ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar, batch.selectedInUse, sel, n); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + // This could be optimized in the future by having separate paths + // for when arg3ColVector is repeating or has no nulls. + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar( value) { + this.arg2Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt new file mode 100644 index 0000000..ef30d1c --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import java.util.Arrays; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column; + private arg2Scalar; + private arg3Scalar; + private int outputColumn; + + public (int arg1Column, arg2Scalar, arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = false; // output is a scalar which we know is non null + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar, batch.selectedInUse, sel, n); + } else { + outputColVector.fill(arg3Scalar, batch.selectedInUse, sel, n); + } + } else if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + outputIsNull[i] = false; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + Arrays.fill(outputIsNull, 0, n, false); + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar( value) { + this.arg2Scalar = value; + } + + public getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar( value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index d3bb28e..de997f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -67,4 +69,103 @@ public Writable getWritableObject(int index) { return writableObj; } } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the selected array entries with provided value + public void fill(double value, boolean selectedInUse, int[] sel, int size) { + noNulls = true; + isRepeating = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = value; + } + } else { + Arrays.fill(vector, 0, size, value); + } + } + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + + if (isRepeating) { + isRepeating = false; + double repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + + if (!noNulls && isNull[0]) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = true; + } + } else { + Arrays.fill(isNull, 0, size, true); + } + } + } + + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } + } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index f65e8fa..eb8c328 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -67,4 +69,146 @@ public Writable getWritableObject(int index) { return writableObj; } } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; // automatic conversion to double is done here + output.isNull[0] = isNull[0]; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the selected array entries with provided value + public void fill(long value, boolean selectedInUse, int[] sel, int size) { + noNulls = true; + isRepeating = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = value; + } + } else { + Arrays.fill(vector, 0, size, value); + } + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + + if (isRepeating) { + isRepeating = false; + long repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + + if (!noNulls && isNull[0]) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = true; + } + } else { + Arrays.fill(isNull, 0, size, true); + } + } + } + + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 1ed0b6f..9827279 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; @@ -186,6 +187,9 @@ public Vectorizer() { supportedGenericUDFs.add(UDFToString.class); supportedGenericUDFs.add(GenericUDFTimestamp.class); + // For conditional expressions + supportedGenericUDFs.add(GenericUDFIf.class); + supportedAggregationUdfs.add("min"); supportedAggregationUdfs.add("max"); supportedAggregationUdfs.add("count"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 0c7e61c..ceedd97 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -21,11 +21,26 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarDoubleScalar; /** * IF(expr1,expr2,expr3)
@@ -33,6 +48,14 @@ * otherwise it returns expr3. IF() returns a numeric or string value, depending * on the context in which it is used. */ +@VectorizedExpressions({IfExprLongColumnLongColumn.class, IfExprDoubleColumnDoubleColumn.class, + IfExprLongColumnLongScalar.class, IfExprDoubleColumnDoubleScalar.class, + IfExprLongColumnDoubleScalar.class, IfExprDoubleColumnLongScalar.class, + IfExprLongScalarLongColumn.class, IfExprDoubleScalarDoubleColumn.class, + IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class, + IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class, + IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class +}) public class GenericUDFIf extends GenericUDF { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 9053132..fb1979e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -52,6 +52,15 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween; @@ -95,6 +104,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; @@ -981,4 +991,87 @@ public void testInFilters() throws HiveException { ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterDoubleColumnInList); } + + /** + * Test that correct VectorExpression classes are chosen for the + * IF (expr1, expr2, expr3) conditional expression for long and double + * input types. expr1 is always an input column expression of type long. expr2 + * and expr3 can be long or double column expressions or constants. + */ + @Test + public void testIfConditionalExprs() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false); + ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false); + + ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc(new Integer(1)); + ExprNodeConstantDesc constDesc3 = new ExprNodeConstantDesc(new Integer(2)); + + // long column/column IF + GenericUDFIf udf = new GenericUDFIf(); + ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + List children1 = new ArrayList(); + children1.add(col1Expr); + children1.add(col2Expr); + children1.add(col3Expr); + exprDesc.setChildren(children1); + + Map columnMap = new HashMap(); + columnMap.put("col1", 1); + columnMap.put("col2", 2); + columnMap.put("col3", 3); + VectorizationContext vc = new VectorizationContext(columnMap, 3); + VectorExpression ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongColumn); + + // long column/scalar IF + children1.set(2, new ExprNodeConstantDesc(1L)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongScalar); + + // long scalar/scalar IF + children1.set(1, new ExprNodeConstantDesc(1L)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongScalar); + + // long scalar/column IF + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongColumn); + + // test for double type + col2Expr = new ExprNodeColumnDesc(Double.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(Double.class, "col3", "table", false); + + // double column/column IF + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleColumnDoubleColumn); + + // double column/scalar IF + children1.set(2, new ExprNodeConstantDesc(1D)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleColumnDoubleScalar); + + // double scalar/scalar IF + children1.set(1, new ExprNodeConstantDesc(1D)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarDoubleScalar); + + // double scalar/column IF + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarDoubleColumn); + + // double scalar/long column IF + children1.set(2, new ExprNodeColumnDesc(Long.class, "col3", "table", false)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarLongColumn); + + // Additional combinations of (long,double)X(column,scalar) for each of the second + // and third arguments are omitted. We have coverage of all the source templates + // already. + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java new file mode 100644 index 0000000..7eb3b24 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -0,0 +1,325 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import static org.junit.Assert.*; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.Arrays; +import junit.framework.Assert; + +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprDoubleColumnDoubleScalar; + +import org.junit.Test; + +/** + * Test vectorized conditional expression handling. + */ +public class TestVectorConditionalExpressions { + + private VectorizedRowBatch getBatch4LongVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector v = new LongColumnVector(); + + // set first argument to IF -- boolean flag + v.vector[0] = 0; + v.vector[1] = 0; + v.vector[2] = 1; + v.vector[3] = 1; + batch.cols[0] = v; + + // set second argument to IF + v = new LongColumnVector(); + v.vector[0] = -1; + v.vector[1] = -2; + v.vector[2] = -3; + v.vector[3] = -4; + batch.cols[1] = v; + + // set third argument to IF + v = new LongColumnVector(); + v.vector[0] = 1; + v.vector[1] = 2; + v.vector[2] = 3; + v.vector[3] = 4; + batch.cols[2] = v; + + // set output colum + batch.cols[3] = new LongColumnVector(); + + batch.size = 4; + return batch; + } + + private VectorizedRowBatch getBatch1Long3DoubleVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector lv = new LongColumnVector(); + + // set first argument to IF -- boolean flag + lv.vector[0] = 0; + lv.vector[1] = 0; + lv.vector[2] = 1; + lv.vector[3] = 1; + batch.cols[0] = lv; + + // set second argument to IF + DoubleColumnVector v = new DoubleColumnVector(); + v.vector[0] = -1; + v.vector[1] = -2; + v.vector[2] = -3; + v.vector[3] = -4; + batch.cols[1] = v; + + // set third argument to IF + v = new DoubleColumnVector(); + v.vector[0] = 1; + v.vector[1] = 2; + v.vector[2] = 3; + v.vector[3] = 4; + batch.cols[2] = v; + + // set output colum + batch.cols[3] = new DoubleColumnVector(); + + batch.size = 4; + return batch; + } + + @Test + public void testLongColumnColumnIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongColumnLongColumn(0, 1, 2, 3); + expr.evaluate(batch); + + // get result vector + LongColumnVector r = (LongColumnVector) batch.cols[3]; + + // verify standard case + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + + // verify when first argument (boolean flags) is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(4, r.vector[3]); + + // verify when second argument is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(-1, r.vector[2]); + assertEquals(-1, r.vector[3]); + + // verify when third argument is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(1, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + + // test when first argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[0].noNulls = false; + batch.cols[0].isNull[1] = true; + batch.cols[0].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when second argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].noNulls = false; + batch.cols[1].isNull[1] = true; + batch.cols[1].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(true, r.isNull[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when third argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].noNulls = false; + batch.cols[2].isNull[1] = true; + batch.cols[2].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(true, r.isNull[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + + // test when second argument has nulls and repeats + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].noNulls = false; + batch.cols[1].isNull[0] = true; + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(true, r.isNull[2]); + assertEquals(true, r.isNull[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when third argument has nulls and repeats + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].noNulls = false; + batch.cols[2].isNull[0] = true; + batch.cols[2].isRepeating = true; + expr.evaluate(batch); + assertEquals(true, r.isNull[0]); + assertEquals(true, r.isNull[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + } + + @Test + public void testDoubleColumnColumnIfExpr() { + // Just spot check because we already checked the logic for long. + // The code is from the same template file. + + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleColumnDoubleColumn(0, 1, 2, 3); + expr.evaluate(batch); + + // get result vector + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + + // verify standard case + assertEquals(true, 1d == r.vector[0]); + assertEquals(true, 2d == r.vector[1]); + assertEquals(true, -3d == r.vector[2]); + assertEquals(true, -4d == r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + } + + @Test + public void testLongColumnScalarIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongColumnLongScalar(0, 1, 100, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(100, r.vector[0]); + assertEquals(100, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + } + + @Test + public void testLongScalarColumnIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongScalarLongColumn(0, 100, 2, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(100, r.vector[2]); + assertEquals(100, r.vector[3]); + } + + @Test + public void testLongScalarScalarIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongScalarLongScalar(0, 100, 200, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(200, r.vector[0]); + assertEquals(200, r.vector[1]); + assertEquals(100, r.vector[2]); + assertEquals(100, r.vector[3]); + } + + @Test + public void testDoubleScalarScalarIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleScalarDoubleScalar(0, 100.0d, 200.0d, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 200d == r.vector[0]); + assertEquals(true, 200d == r.vector[1]); + assertEquals(true, 100d == r.vector[2]); + assertEquals(true, 100d == r.vector[3]); + } + + @Test + public void testDoubleScalarColumnIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleScalarDoubleColumn(0, 100.0d, 2, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 1d == r.vector[0]); + assertEquals(true, 2d == r.vector[1]); + assertEquals(true, 100d == r.vector[2]); + assertEquals(true, 100d == r.vector[3]); + } + + @Test + public void testDoubleColumnScalarIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleColumnDoubleScalar(0, 1, 200d, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 200d == r.vector[0]); + assertEquals(true, 200d == r.vector[1]); + assertEquals(true, -3d == r.vector[2]); + assertEquals(true, -4d == r.vector[3]); + } +}