diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 1d3c5c4..a286024 100644 --- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -386,9 +386,25 @@ // See org.apache.hadoop.hive.ql.exec.vector.expressions for remaining cast VectorExpression // classes - {"ColumnUnaryMinus", "long"}, - {"ColumnUnaryMinus", "double"}, - + {"ColumnUnaryMinus", "long"}, + {"ColumnUnaryMinus", "double"}, + + // IF conditional expression + // fileHeader, resultType, arg2Type, arg3Type + {"IfExprColumnColumn", "long"}, + {"IfExprColumnColumn", "double"}, + {"IfExprColumnScalar", "long", "long"}, + {"IfExprColumnScalar", "double", "long"}, + {"IfExprColumnScalar", "long", "double"}, + {"IfExprColumnScalar", "double", "double"}, + {"IfExprScalarColumn", "long", "long"}, + {"IfExprScalarColumn", "double", "long"}, + {"IfExprScalarColumn", "long", "double"}, + {"IfExprScalarColumn", "double", "double"}, + {"IfExprScalarScalar", "long", "long"}, + {"IfExprScalarScalar", "double", "long"}, + {"IfExprScalarScalar", "long", "double"}, + {"IfExprScalarScalar", "double", "double"}, // template, , , , , {"VectorUDAFMinMax", "VectorUDAFMinLong", "long", "<", "min", @@ -567,6 +583,14 @@ private void generate() throws Exception { generateFilterStringColumnCompareColumn(tdesc); } else if (tdesc[0].equals("StringColumnCompareColumn")) { generateStringColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("IfExprColumnColumn")) { + generateIfExprColumnColumn(tdesc); + } else if (tdesc[0].equals("IfExprColumnScalar")) { + generateIfExprColumnScalar(tdesc); + } else if (tdesc[0].equals("IfExprScalarColumn")) { + generateIfExprScalarColumn(tdesc); + } else if (tdesc[0].equals("IfExprScalarScalar")) { + generateIfExprScalarScalar(tdesc); } else { continue; } @@ -800,6 +824,89 @@ private void generateColumnUnaryMinus(String[] tdesc) throws IOException { className, templateString); } + private void generateIfExprColumnColumn(String[] tdesc) throws IOException { + String operandType = tdesc[1]; + String inputColumnVectorType = this.getColumnVectorType(operandType); + String outputColumnVectorType = inputColumnVectorType; + String returnType = operandType; + String className = "IfExpr" + getCamelCaseType(operandType) + "Column" + + getCamelCaseType(operandType) + "Column"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateIfExprColumnScalar(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg2ColumnVectorType = this.getColumnVectorType(operandType2); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Column" + + getCamelCaseType(operandType3) + "Scalar"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", arg2ColumnVectorType); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateIfExprScalarColumn(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg3ColumnVectorType = this.getColumnVectorType(operandType3); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Scalar" + + getCamelCaseType(operandType3) + "Column"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", arg3ColumnVectorType); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateIfExprScalarScalar(String[] tdesc) throws IOException { + String operandType2 = tdesc[1]; + String operandType3 = tdesc[2]; + String arg3ColumnVectorType = this.getColumnVectorType(operandType3); + String returnType = getArithmeticReturnType(operandType2, operandType3); + String outputColumnVectorType = getColumnVectorType(returnType); + String className = "IfExpr" + getCamelCaseType(operandType2) + "Scalar" + + getCamelCaseType(operandType3) + "Scalar"; + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", operandType2); + templateString = templateString.replaceAll("", operandType3); + templateString = templateString.replaceAll("", outputColumnVectorType); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + // template, , , , , , private void generateColumnUnaryFunc(String[] tdesc) throws IOException { String classNamePrefix = tdesc[1]; diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt new file mode 100644 index 0000000..eebf71b --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnColumn.txt @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are long columns or long expression results. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public (int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + arg3ColVector = () batch.cols[arg3Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector2 = arg2ColVector.vector; + [] vector3 = arg3ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? vector2[i] : vector3[i]); + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt new file mode 100644 index 0000000..bff4e9e --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprColumnScalar.txt @@ -0,0 +1,177 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private arg3Scalar; + private int outputColumn; + + public (int arg1Column, int arg2Column, arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg2ColVector = () batch.cols[arg2Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; // nulls can only come from arg2 + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector2 = arg2ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? vector2[i] : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + vector2[i] : arg3Scalar); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore repeating and no nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar( value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt new file mode 100644 index 0000000..701e88b --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarColumn.txt @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NullUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a column or non-constant expression result. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private arg2Scalar; + private int outputColumn; + + public (int arg1Column, arg2Scalar, int arg3Column, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + arg3ColVector = () batch.cols[arg3Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; // nulls can only come from arg3 column vector + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] vector3 = arg3ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // Extend any repeating values and noNulls indicator in the inputs to + // reduce the number of code paths needed below. + // This could be optimized in the future by having separate paths + // for when arg3ColVector is repeating or has no nulls. + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : vector3[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : vector3[i]); + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore repeating and no nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar( value) { + this.arg2Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt new file mode 100644 index 0000000..c211617 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/IfExprScalarScalar.txt @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import java.util.Arrays; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a constant value. + * The third is a constant value. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column; + private arg2Scalar; + private arg3Scalar; + private int outputColumn; + + public (int arg1Column, arg2Scalar, arg3Scalar, + int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = false; // output is a scalar which we know is non null + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + } else if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (vector1[i] == 1 ? arg2Scalar : arg3Scalar); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + outputIsNull[i] = false; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2Scalar : arg3Scalar); + } + Arrays.fill(outputIsNull, 0, n, false); + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar( value) { + this.arg2Scalar = value; + } + + public getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar( value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index e1d4543..f1eef14 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -219,4 +221,93 @@ public Writable getWritableObject(int index) { } return result; } + + /** Copy the current object contents into the output. Only copy selected entries, + * as indicated by selectedInUse and the sel array. + */ + public void copySelected( + boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.setVal(0, vector[0], start[0], length[0]); + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } + else { + for (int i = 0; i < size; i++) { + output.setVal(i, vector[i], start[i], length[i]); + } + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** Simplify vector by brute-force flattening noNulls and isRepeating + * This can be used to reduce combinatorial explosion of code paths in VectorExpressions + * with many arguments, at the expense of loss of some performance. + */ + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + + // setRef is used below and this is safe, because the reference + // is to data owned by this column vector. If this column vector + // gets re-used, the whole thing is re-used together so there + // is no danger of a dangling reference. + + // Only copy data values if entry is not null. The string value + // at position 0 is undefined if the position 0 value is null. + if (noNulls || (!noNulls && !isNull[0])) { + + // loops start at position 1 because position 0 is already set + if (selectedInUse) { + for (int j = 1; j < size; j++) { + int i = sel[j]; + this.setRef(i, vector[0], start[0], length[0]); + } + } else { + for (int i = 1; i < size; i++) { + this.setRef(i, vector[0], start[0], length[0]); + } + } + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + // Fill the all the vector entries with provided value + public void fill(byte[] value) { + noNulls = true; + isRepeating = true; + setRef(0, value, 0, value.length); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 48b87ea..0a8811f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -49,6 +49,11 @@ * If so, vector[0] holds the repeating value. */ public boolean isRepeating; + + // Variables to hold state from before flattening so it can be easily restored. + private boolean preFlattenIsRepeating; + private boolean preFlattenNoNulls; + public abstract Writable getWritableObject(int index); /** @@ -76,5 +81,66 @@ public void reset() { noNulls = true; isRepeating = false; } + + abstract public void flatten(boolean selectedInUse, int[] sel, int size); + + // Simplify vector by brute-force flattening noNulls if isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { + + boolean nullFillValue; + + if (noNulls) { + nullFillValue = false; + } else { + nullFillValue = isNull[0]; + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = nullFillValue; + } + } else { + Arrays.fill(isNull, 0, size, nullFillValue); + } + + // all nulls are now explicit + noNulls = false; + } + + public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) { + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } + + /** + * Restore the state of isRepeating and noNulls to what it was + * before flattening. This must only be called just after flattening + * and then evaluating a VectorExpression on the column vector. + * It is an optimization that allows other operations on the same + * column to continue to benefit from the isRepeating and noNulls + * indicators. + */ + public void unFlatten() { + isRepeating = preFlattenIsRepeating; + noNulls = preFlattenNoNulls; + } + + // Record repeating and no nulls state to be restored later. + protected void flattenPush() { + preFlattenIsRepeating = isRepeating; + preFlattenNoNulls = noNulls; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index d3bb28e..cb23129 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -67,4 +69,76 @@ public Writable getWritableObject(int index) { return writableObj; } } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(double value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + double repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index f65e8fa..aa05b19 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.Arrays; + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -67,4 +69,120 @@ public Writable getWritableObject(int index) { return writableObj; } } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; // automatic conversion to double is done here + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(long value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java new file mode 100644 index 0000000..c321ad0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java @@ -0,0 +1,205 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are string columns or string expression results. + */ +public class IfExprStringColumnStringColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public IfExprStringColumnStringColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringColumnStringColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java new file mode 100644 index 0000000..33bdb45 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string column expression. + * The third is a string scalar. + */ +public class IfExprStringColumnStringScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private byte[] arg3Scalar; + private int outputColumn; + + public IfExprStringColumnStringScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprStringColumnStringScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if arg2 has no nulls. + * This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore state of repeating and non nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public int getArg2Column() { + return arg2Column; + } + + public void setArg2Column(int colNum) { + this.arg2Column = colNum; + } + + public byte[] getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar(byte[] value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java new file mode 100644 index 0000000..5190884 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string column or non-constant expression result. + */ +public class IfExprStringScalarStringColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private byte[] arg2Scalar; + private int outputColumn; + + public IfExprStringScalarStringColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringScalarStringColumn() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even arg3 has no + * nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the input + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore state of repeating and non nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public byte[] getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar(byte[] value) { + this.arg2Scalar = value; + } + + public int getArg3Column() { + return arg3Column; + } + + public void setArg3Column(int colNum) { + this.arg3Column = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java new file mode 100644 index 0000000..f6fcfea --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprStringScalarStringScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column; + private byte[] arg2Scalar; + private byte[] arg3Scalar; + private int outputColumn; + + public IfExprStringScalarStringScalar( + int arg1Column, byte[] arg2Scalar, byte[] arg3Scalar, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprStringScalarStringScalar() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + outputColVector.noNulls = true; // output must be a scalar and neither one is null + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg2Scalar.length); + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + public int getArg1Column() { + return arg1Column; + } + + public void setArg1Column(int colNum) { + this.arg1Column = colNum; + } + + public byte[] getArg2Scalar() { + return arg2Scalar; + } + + public void setArg2Scalar(byte[] value) { + this.arg2Scalar = value; + } + + public byte[] getArg3Scalar() { + return arg3Scalar; + } + + public void setArg3Scalar(byte[] value) { + this.arg3Scalar = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType("long"), + VectorExpressionDescriptor.ArgumentType.getType("string"), + VectorExpressionDescriptor.ArgumentType.getType("string")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 5c7617e..7392a9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCeil; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFFloor; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; @@ -261,6 +262,9 @@ public Vectorizer() { supportedGenericUDFs.add(UDFToString.class); supportedGenericUDFs.add(GenericUDFTimestamp.class); + // For conditional expressions + supportedGenericUDFs.add(GenericUDFIf.class); + supportedAggregationUdfs.add("min"); supportedAggregationUdfs.add("max"); supportedAggregationUdfs.add("count"); @@ -347,17 +351,17 @@ private void vectorizeMRTask(MapRedTask mrTask) throws SemanticException { topNodes.addAll(mapWork.getAliasToWork().values()); HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - + Map> columnVectorTypes = vnp.getScratchColumnVectorTypes(); mapWork.setScratchColumnVectorTypes(columnVectorTypes); Map> columnMap = vnp.getScratchColumnMap(); mapWork.setScratchColumnMap(columnMap); - + if (LOG.isDebugEnabled()) { LOG.debug(String.format("vectorTypes: %s", columnVectorTypes.toString())); LOG.debug(String.format("columnMap: %s", columnMap.toString())); } - + return; } } @@ -426,9 +430,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Operator op = (Operator) nd; - - VectorizationContext vContext = null; - + + VectorizationContext vContext = null; + if (op instanceof TableScanOperator) { vContext = getVectorizationContext(op, physicalContext); for (String onefile : mWork.getPathToAliases().keySet()) { @@ -458,9 +462,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, --i; } } - + assert vContext != null; - + if (op.getType().equals(OperatorType.REDUCESINK) && op.getParentOperators().get(0).getType().equals(OperatorType.GROUPBY)) { // No need to vectorize diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 0c7e61c..adf55c8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -21,11 +21,30 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; /** * IF(expr1,expr2,expr3)
@@ -33,6 +52,17 @@ * otherwise it returns expr3. IF() returns a numeric or string value, depending * on the context in which it is used. */ +@VectorizedExpressions({ + IfExprLongColumnLongColumn.class, IfExprDoubleColumnDoubleColumn.class, + IfExprLongColumnLongScalar.class, IfExprDoubleColumnDoubleScalar.class, + IfExprLongColumnDoubleScalar.class, IfExprDoubleColumnLongScalar.class, + IfExprLongScalarLongColumn.class, IfExprDoubleScalarDoubleColumn.class, + IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class, + IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class, + IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class, + IfExprStringColumnStringColumn.class, IfExprStringColumnStringScalar.class, + IfExprStringScalarStringColumn.class, IfExprStringScalarStringScalar.class +}) public class GenericUDFIf extends GenericUDF { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; @@ -94,5 +124,4 @@ public String getDisplayString(String[] children) { sb.append(children[2]).append(")"); return sb.toString(); } - } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 720ca54..eff251f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -37,6 +37,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList; @@ -56,6 +60,15 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterLongColumnInList; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterDoubleColumnInList; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColUnaryMinus; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColLessDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween; @@ -93,6 +106,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; @@ -111,6 +125,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFRound; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; @@ -1004,4 +1019,177 @@ public void testInFiltersAndExprs() throws HiveException { ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.PROJECTION); assertTrue(ve instanceof DoubleColumnInList); } + + /** + * Test that correct VectorExpression classes are chosen for the + * IF (expr1, expr2, expr3) conditional expression for integer, float, + * boolean, timestamp and string input types. expr1 is always an input column expression + * of type long. expr2 and expr3 can be column expressions or constants of other types + * but must have the same type. + */ + @Test + public void testIfConditionalExprs() throws HiveException { + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); + ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false); + ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false); + + ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc(new Integer(1)); + ExprNodeConstantDesc constDesc3 = new ExprNodeConstantDesc(new Integer(2)); + + // long column/column IF + GenericUDFIf udf = new GenericUDFIf(); + ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + List children1 = new ArrayList(); + children1.add(col1Expr); + children1.add(col2Expr); + children1.add(col3Expr); + exprDesc.setChildren(children1); + + Map columnMap = new HashMap(); + columnMap.put("col1", 1); + columnMap.put("col2", 2); + columnMap.put("col3", 3); + VectorizationContext vc = new VectorizationContext(columnMap, 3); + VectorExpression ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongColumn); + + // long column/scalar IF + children1.set(2, new ExprNodeConstantDesc(1L)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongScalar); + + // long scalar/scalar IF + children1.set(1, new ExprNodeConstantDesc(1L)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongScalar); + + // long scalar/column IF + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongColumn); + + // test for double type + col2Expr = new ExprNodeColumnDesc(Double.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(Double.class, "col3", "table", false); + + // double column/column IF + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleColumnDoubleColumn); + + // double column/scalar IF + children1.set(2, new ExprNodeConstantDesc(1D)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleColumnDoubleScalar); + + // double scalar/scalar IF + children1.set(1, new ExprNodeConstantDesc(1D)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarDoubleScalar); + + // double scalar/column IF + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarDoubleColumn); + + // double scalar/long column IF + children1.set(2, new ExprNodeColumnDesc(Long.class, "col3", "table", false)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprDoubleScalarLongColumn); + + // Additional combinations of (long,double)X(column,scalar) for each of the second + // and third arguments are omitted. We have coverage of all the source templates + // already. + + // test for timestamp type + col2Expr = new ExprNodeColumnDesc(Timestamp.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(Timestamp.class, "col3", "table", false); + + // timestamp column/column IF + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongColumn); + + // timestamp column/scalar IF where scalar is really a CAST of a constant to timestamp. + ExprNodeGenericFuncDesc f = new ExprNodeGenericFuncDesc(); + f.setGenericUDF(new GenericUDFTimestamp()); + f.setTypeInfo(TypeInfoFactory.timestampTypeInfo); + List children2 = new ArrayList(); + f.setChildren(children2); + children2.add(new ExprNodeConstantDesc("2013-11-05 00:00:00.000")); + children1.set(2, f); + ve = vc.getVectorExpression(exprDesc); + + // We check for two different classes below because initially the result + // is IfExprLongColumnLongColumn but in the future if the system is enhanced + // with constant folding then the result will be IfExprLongColumnLongScalar. + assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() + || IfExprLongColumnLongScalar.class == ve.getClass()); + + // timestamp scalar/scalar + children1.set(1, f); + ve = vc.getVectorExpression(exprDesc); + assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() + || IfExprLongScalarLongScalar.class == ve.getClass()); + + // timestamp scalar/column + children1.set(2, col3Expr); + assertTrue(IfExprLongColumnLongColumn.class == ve.getClass() + || IfExprLongScalarLongColumn.class == ve.getClass()); + + // test for boolean type + col2Expr = new ExprNodeColumnDesc(Boolean.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(Boolean.class, "col3", "table", false); + + // column/column + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongColumn); + + // column/scalar IF + children1.set(2, new ExprNodeConstantDesc(true)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongColumnLongScalar); + + // scalar/scalar IF + children1.set(1, new ExprNodeConstantDesc(true)); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongScalar); + + // scalar/column IF + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprLongScalarLongColumn); + + // test for string type + constDesc2 = new ExprNodeConstantDesc("Alpha"); + constDesc3 = new ExprNodeConstantDesc("Bravo"); + col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(String.class, "col3", "table", false); + + // column/column + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringColumnStringColumn); + + // column/scalar + children1.set(2, constDesc3); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringColumnStringScalar); + + // scalar/scalar + children1.set(1, constDesc2); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringScalarStringScalar); + + // scalar/column + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringScalarStringColumn); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java index a250c9d..78cd5cd 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java @@ -27,11 +27,11 @@ * Test creation and basic manipulation of VectorizedRowBatch. */ public class TestVectorizedRowBatch { - + // test fields static final String[] COLORS = {"red", "yellow", "green", "blue", "violet", "orange"}; private static byte[][] colorsBytes; - + private VectorizedRowBatch makeBatch() { VectorizedRowBatch batch = new VectorizedRowBatch(3); LongColumnVector lv = new LongColumnVector(); @@ -44,29 +44,29 @@ private VectorizedRowBatch makeBatch() { addRandomNulls(batch); return batch; } - + @Test /** - * Make sure you can create a batch and that all columns are the + * Make sure you can create a batch and that all columns are the * default size. */ public void testVectorizedRowBatchCreate() { VectorizedRowBatch batch = makeBatch(); Assert.assertEquals(3, batch.numCols); Assert.assertEquals(VectorizedRowBatch.DEFAULT_SIZE, batch.size); - Assert.assertEquals(((LongColumnVector) batch.cols[0]).vector.length, + Assert.assertEquals(((LongColumnVector) batch.cols[0]).vector.length, + VectorizedRowBatch.DEFAULT_SIZE); + Assert.assertEquals(((DoubleColumnVector) batch.cols[1]).vector.length, VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertEquals(((DoubleColumnVector) batch.cols[1]).vector.length, - VectorizedRowBatch.DEFAULT_SIZE); - Assert.assertEquals(((BytesColumnVector) batch.cols[2]).vector.length, + Assert.assertEquals(((BytesColumnVector) batch.cols[2]).vector.length, VectorizedRowBatch.DEFAULT_SIZE); } - + /* * Test routines to exercise VectorizedRowBatch * by filling column vectors with data and null values. */ - + public static void setRandom(VectorizedRowBatch batch) { batch.size = VectorizedRowBatch.DEFAULT_SIZE; for (int i = 0; i != batch.numCols; i++) { @@ -84,24 +84,24 @@ public static void setSample(VectorizedRowBatch batch) { /** * Set to sample data, re-using existing columns in batch. - * + * * @param batch */ public static void setSampleOverwrite(VectorizedRowBatch batch) { - + // Put sample data in the columns. for (int i = 0; i != batch.numCols; i++) { setSampleLongCol((LongColumnVector) batch.cols[i]); } - + // Reset the selection vector. batch.selectedInUse = false; batch.size = VectorizedRowBatch.DEFAULT_SIZE; } - + /** * Sprinkle null values in this column vector. - * + * * @param col */ public static void addRandomNulls(ColumnVector col) { @@ -111,10 +111,10 @@ public static void addRandomNulls(ColumnVector col) { col.isNull[i] = Math.abs(rand.nextInt() % 11) == 0; } } - + /** * Add null values, but do it faster, by avoiding use of Random(). - * + * * @param col */ public void addSampleNulls(ColumnVector col) { @@ -136,20 +136,20 @@ public void addSampleNulls(VectorizedRowBatch batch) { addSampleNulls(batch.cols[i]); } } - + /** * Set vector elements to sample string data from colorsBytes string table. * @param col */ - public static void setSampleStringCol(BytesColumnVector col) { + public static void setSampleStringCol(BytesColumnVector col) { initColors(); int size = col.vector.length; for(int i = 0; i != size; i++) { int pos = i % colorsBytes.length; col.setRef(i, colorsBytes[pos], 0, colorsBytes[pos].length); - } + } } - + /* * Initialize string table in a lazy fashion. */ @@ -161,7 +161,7 @@ private static void initColors() { } } } - + /** * Set the vector to sample data that repeats an iteration from 0 to 99. @@ -190,7 +190,7 @@ public static void setRepeatingLongCol(LongColumnVector col) { col.isRepeating = true; col.vector[0] = 50; } - + /** * Set the vector to sample data that repeats an iteration from 0 to 99. * @param col @@ -218,4 +218,64 @@ public static void setRepeatingDoubleCol(DoubleColumnVector col) { col.isRepeating = true; col.vector[0] = 50.0; } + + @Test + public void testFlatten() { + verifyFlatten(new LongColumnVector()); + verifyFlatten(new DoubleColumnVector()); + verifyFlatten(new BytesColumnVector()); + } + + private void verifyFlatten(ColumnVector v) { + + // verify that flattening and unflattenting no-nulls works + v.noNulls = true; + v.isNull[1] = true; + int[] sel = {0, 2}; + int size = 2; + v.flatten(true, sel, size); + Assert.assertFalse(v.noNulls); + Assert.assertFalse(v.isNull[0] || v.isNull[2]); + v.unFlatten(); + Assert.assertTrue(v.noNulls); + + // verify that flattening and unflattening "isRepeating" works + v.isRepeating = true; + v.noNulls = false; + v.isNull[0] = true; + v.flatten(true, sel, 2); + Assert.assertFalse(v.noNulls); + Assert.assertTrue(v.isNull[0] && v.isNull[2]); + Assert.assertFalse(v.isRepeating); + v.unFlatten(); + Assert.assertFalse(v.noNulls); + Assert.assertTrue(v.isRepeating); + + // verify extension of values in the array + v.noNulls = true; + if (v instanceof LongColumnVector) { + ((LongColumnVector) v).vector[0] = 100; + v.flatten(true, sel, 2); + Assert.assertTrue(((LongColumnVector) v).vector[2] == 100); + } else if (v instanceof DoubleColumnVector) { + ((DoubleColumnVector) v).vector[0] = 200d; + v.flatten(true, sel, 2); + Assert.assertTrue(((DoubleColumnVector) v).vector[2] == 200d); + } else if (v instanceof BytesColumnVector) { + BytesColumnVector bv = (BytesColumnVector) v; + byte[] b = null; + try { + b = "foo".getBytes("UTF-8"); + } catch (Exception e) { + ; // eat it + } + bv.setRef(0, b, 0, b.length); + bv.flatten(true, sel, 2); + Assert.assertEquals(bv.vector[0], bv.vector[2]); + Assert.assertEquals(bv.start[0], bv.start[2]); + Assert.assertEquals(bv.length[0], bv.length[2]); + } + } + + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java new file mode 100644 index 0000000..139a5aa --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -0,0 +1,517 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import static org.junit.Assert.*; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongColumnLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarLongScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; + +import org.junit.Test; + +/** + * Test vectorized conditional expression handling. + */ +public class TestVectorConditionalExpressions { + + private VectorizedRowBatch getBatch4LongVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector v = new LongColumnVector(); + + // set first argument to IF -- boolean flag + v.vector[0] = 0; + v.vector[1] = 0; + v.vector[2] = 1; + v.vector[3] = 1; + batch.cols[0] = v; + + // set second argument to IF + v = new LongColumnVector(); + v.vector[0] = -1; + v.vector[1] = -2; + v.vector[2] = -3; + v.vector[3] = -4; + batch.cols[1] = v; + + // set third argument to IF + v = new LongColumnVector(); + v.vector[0] = 1; + v.vector[1] = 2; + v.vector[2] = 3; + v.vector[3] = 4; + batch.cols[2] = v; + + // set output column + batch.cols[3] = new LongColumnVector(); + + batch.size = 4; + return batch; + } + + private VectorizedRowBatch getBatch1Long3DoubleVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector lv = new LongColumnVector(); + + // set first argument to IF -- boolean flag + lv.vector[0] = 0; + lv.vector[1] = 0; + lv.vector[2] = 1; + lv.vector[3] = 1; + batch.cols[0] = lv; + + // set second argument to IF + DoubleColumnVector v = new DoubleColumnVector(); + v.vector[0] = -1; + v.vector[1] = -2; + v.vector[2] = -3; + v.vector[3] = -4; + batch.cols[1] = v; + + // set third argument to IF + v = new DoubleColumnVector(); + v.vector[0] = 1; + v.vector[1] = 2; + v.vector[2] = 3; + v.vector[3] = 4; + batch.cols[2] = v; + + // set output column + batch.cols[3] = new DoubleColumnVector(); + + batch.size = 4; + return batch; + } + + private VectorizedRowBatch getBatch1Long3BytesVectors() { + VectorizedRowBatch batch = new VectorizedRowBatch(4); + LongColumnVector lv = new LongColumnVector(); + + // set first argument to IF -- boolean flag + lv.vector[0] = 0; + lv.vector[1] = 0; + lv.vector[2] = 1; + lv.vector[3] = 1; + batch.cols[0] = lv; + + // set second argument to IF + BytesColumnVector v = new BytesColumnVector(); + v.initBuffer(); + setString(v, 0, "arg2_0"); + setString(v, 1, "arg2_1"); + setString(v, 2, "arg2_2"); + setString(v, 3, "arg2_3"); + + batch.cols[1] = v; + + // set third argument to IF + v = new BytesColumnVector(); + v.initBuffer(); + setString(v, 0, "arg3_0"); + setString(v, 1, "arg3_1"); + setString(v, 2, "arg3_2"); + setString(v, 3, "arg3_3"); + batch.cols[2] = v; + + // set output column + v = new BytesColumnVector(); + v.initBuffer(); + batch.cols[3] = v; + batch.size = 4; + return batch; + } + + private void setString(BytesColumnVector v, int i, String s) { + byte[] b = getUTF8Bytes(s); + v.setVal(i, b, 0, b.length); + } + + private byte[] getUTF8Bytes(String s) { + byte[] b = null; + try { + b = s.getBytes("UTF-8"); + } catch (Exception e) { + ; // eat it + } + return b; + } + + private String getString(BytesColumnVector v, int i) { + String s = null; + try { + s = new String(v.vector[i], v.start[i], v.length[i], "UTF-8"); + } catch (Exception e) { + ; // eat it + } + return s; + } + + @Test + public void testLongColumnColumnIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongColumnLongColumn(0, 1, 2, 3); + expr.evaluate(batch); + + // get result vector + LongColumnVector r = (LongColumnVector) batch.cols[3]; + + // verify standard case + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + + // verify when first argument (boolean flags) is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(4, r.vector[3]); + + // verify when second argument is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(-1, r.vector[2]); + assertEquals(-1, r.vector[3]); + + // verify when third argument is repeating + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(1, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + + // test when first argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[0].noNulls = false; + batch.cols[0].isNull[1] = true; + batch.cols[0].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when second argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].noNulls = false; + batch.cols[1].isNull[1] = true; + batch.cols[1].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(true, r.isNull[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when third argument has nulls + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].noNulls = false; + batch.cols[2].isNull[1] = true; + batch.cols[2].isNull[2] = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(true, r.isNull[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + + // test when second argument has nulls and repeats + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[1].noNulls = false; + batch.cols[1].isNull[0] = true; + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(true, r.isNull[2]); + assertEquals(true, r.isNull[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + + // test when third argument has nulls and repeats + batch = getBatch4LongVectors(); + r = (LongColumnVector) batch.cols[3]; + batch.cols[2].noNulls = false; + batch.cols[2].isNull[0] = true; + batch.cols[2].isRepeating = true; + expr.evaluate(batch); + assertEquals(true, r.isNull[0]); + assertEquals(true, r.isNull[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + assertEquals(false, r.noNulls); + assertEquals(false, r.isRepeating); + } + + @Test + public void testDoubleColumnColumnIfExpr() { + // Just spot check because we already checked the logic for long. + // The code is from the same template file. + + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleColumnDoubleColumn(0, 1, 2, 3); + expr.evaluate(batch); + + // get result vector + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + + // verify standard case + assertEquals(true, 1d == r.vector[0]); + assertEquals(true, 2d == r.vector[1]); + assertEquals(true, -3d == r.vector[2]); + assertEquals(true, -4d == r.vector[3]); + assertEquals(true, r.noNulls); + assertEquals(false, r.isRepeating); + } + + @Test + public void testLongColumnScalarIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongColumnLongScalar(0, 1, 100, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(100, r.vector[0]); + assertEquals(100, r.vector[1]); + assertEquals(-3, r.vector[2]); + assertEquals(-4, r.vector[3]); + } + + @Test + public void testLongScalarColumnIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongScalarLongColumn(0, 100, 2, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(1, r.vector[0]); + assertEquals(2, r.vector[1]); + assertEquals(100, r.vector[2]); + assertEquals(100, r.vector[3]); + } + + @Test + public void testLongScalarScalarIfExpr() { + VectorizedRowBatch batch = getBatch4LongVectors(); + VectorExpression expr = new IfExprLongScalarLongScalar(0, 100, 200, 3); + LongColumnVector r = (LongColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(200, r.vector[0]); + assertEquals(200, r.vector[1]); + assertEquals(100, r.vector[2]); + assertEquals(100, r.vector[3]); + } + + @Test + public void testDoubleScalarScalarIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleScalarDoubleScalar(0, 100.0d, 200.0d, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 200d == r.vector[0]); + assertEquals(true, 200d == r.vector[1]); + assertEquals(true, 100d == r.vector[2]); + assertEquals(true, 100d == r.vector[3]); + } + + @Test + public void testDoubleScalarColumnIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleScalarDoubleColumn(0, 100.0d, 2, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 1d == r.vector[0]); + assertEquals(true, 2d == r.vector[1]); + assertEquals(true, 100d == r.vector[2]); + assertEquals(true, 100d == r.vector[3]); + } + + @Test + public void testDoubleColumnScalarIfExpr() { + VectorizedRowBatch batch = getBatch1Long3DoubleVectors(); + VectorExpression expr = new IfExprDoubleColumnDoubleScalar(0, 1, 200d, 3); + DoubleColumnVector r = (DoubleColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertEquals(true, 200d == r.vector[0]); + assertEquals(true, 200d == r.vector[1]); + assertEquals(true, -3d == r.vector[2]); + assertEquals(true, -4d == r.vector[3]); + } + + @Test + public void testIfExprStringColumnStringColumn() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + VectorExpression expr = new IfExprStringColumnStringColumn(0, 1, 2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + + // test first IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[0].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg3_2")); + assertTrue(getString(r, 3).equals("arg3_3")); + + // test second IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[1].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_0")); + assertTrue(getString(r, 3).equals("arg2_0")); + + // test third IF argument repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[2].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_0")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + + // test second IF argument with nulls + batch = getBatch1Long3BytesVectors(); + batch.cols[1].noNulls = false; + batch.cols[1].isNull[2] = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(!r.noNulls && r.isNull[2]); + assertTrue(getString(r, 3).equals("arg2_3")); + assertFalse(r.isNull[0] || r.isNull[1] || r.isNull[3]); + + // test third IF argument with nulls + batch = getBatch1Long3BytesVectors(); + batch.cols[2].noNulls = false; + batch.cols[2].isNull[0] = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(!r.noNulls && r.isNull[0]); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + assertFalse(r.isNull[1] || r.isNull[2] || r.isNull[3]); + + // test second IF argument with nulls and repeating + batch = getBatch1Long3BytesVectors(); + batch.cols[1].noNulls = false; + batch.cols[1].isNull[0] = true; + batch.cols[1].isRepeating = true; + r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(!r.noNulls && r.isNull[2]); + assertTrue(!r.noNulls && r.isNull[3]); + assertFalse(r.isNull[0] || r.isNull[1]); + } + + @Test + public void testIfExprStringColumnStringScalar() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar = getUTF8Bytes("scalar"); + VectorExpression expr = new IfExprStringColumnStringScalar(0, 1, scalar, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("scalar")); + assertTrue(getString(r, 1).equals("scalar")); + assertTrue(getString(r, 2).equals("arg2_2")); + assertTrue(getString(r, 3).equals("arg2_3")); + } + + @Test + public void testIfExprStringScalarStringColumn() { + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar = getUTF8Bytes("scalar"); + VectorExpression expr = new IfExprStringScalarStringColumn(0,scalar, 2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("arg3_0")); + assertTrue(getString(r, 1).equals("arg3_1")); + assertTrue(getString(r, 2).equals("scalar")); + assertTrue(getString(r, 3).equals("scalar")); + } + + @Test + public void testIfExprStringScalarStringScalar() { + + // standard case + VectorizedRowBatch batch = getBatch1Long3BytesVectors(); + byte[] scalar1 = getUTF8Bytes("scalar1"); + byte[] scalar2 = getUTF8Bytes("scalar2"); + VectorExpression expr = new IfExprStringScalarStringScalar(0,scalar1, scalar2, 3); + BytesColumnVector r = (BytesColumnVector) batch.cols[3]; + expr.evaluate(batch); + assertTrue(getString(r, 0).equals("scalar2")); + assertTrue(getString(r, 1).equals("scalar2")); + assertTrue(getString(r, 2).equals("scalar1")); + assertTrue(getString(r, 3).equals("scalar1")); + assertFalse(r.isRepeating); + + // repeating case for first (boolean flag) argument to IF + batch = getBatch1Long3BytesVectors(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + r = (BytesColumnVector) batch.cols[3]; + assertTrue(r.isRepeating); + assertTrue(getString(r, 0).equals("scalar2")); + } +}