diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 15c01f7..76e9f59 100644 --- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -216,6 +216,43 @@ {"FilterColumnCompareColumn", "GreaterEqual", "long", "long", ">="}, {"FilterColumnCompareColumn", "GreaterEqual", "double", "long", ">="}, + // template, , , , , , + // + {"ColumnUnaryFunc", "FuncRound", "double", "double", "MathExpr.round", "", ""}, + // round(longCol) returns a long and is a no-op. So it will not be implemented here. + // round(Col, N) is a special case and will be implemented separately from this template + {"ColumnUnaryFunc", "FuncFloor", "long", "double", "Math.floor", "", "(long)"}, + // Note: floor(long) is a no-op so code generation should remove it or use + // an IdentityExpression + {"ColumnUnaryFunc", "FuncCeil", "long", "double", "Math.ceil", "", "(long)"}, + // Similarly, ceil(long) is a no-op, so not generating code for it here + {"ColumnUnaryFunc", "FuncExp", "double", "double", "Math.exp", "", ""}, + {"ColumnUnaryFunc", "FuncLn", "double", "double", "Math.log", "", ""}, + {"ColumnUnaryFunc", "FuncLn", "double", "long", "Math.log", "(double)", ""}, + {"ColumnUnaryFunc", "FuncLog10", "double", "double", "Math.log10", "", ""}, + {"ColumnUnaryFunc", "FuncLog10", "double", "long", "Math.log10", "(double)", ""}, + // The MathExpr class contains helper functions for cases when existing library + // routines can't be used directly. + {"ColumnUnaryFunc", "FuncLog2", "double", "double", "MathExpr.log2", "", ""}, + {"ColumnUnaryFunc", "FuncLog2", "double", "long", "MathExpr.log2", "(double)", ""}, + // Log(base, Col) is a special case and will be implemented separately from this template + // Pow(col, P) and Power(col, P) are special cases implemented separately from this template + {"ColumnUnaryFunc", "FuncSqrt", "double", "double", "Math.sqrt", "", ""}, + {"ColumnUnaryFunc", "FuncSqrt", "double", "long", "Math.sqrt", "(double)", ""}, + {"ColumnUnaryFunc", "FuncAbs", "double", "double", "Math.abs", "", ""}, + {"ColumnUnaryFunc", "FuncAbs", "long", "long", "MathExpr.abs", "", ""}, + {"ColumnUnaryFunc", "FuncSin", "double", "double", "Math.sin", "", ""}, + {"ColumnUnaryFunc", "FuncASin", "double", "double", "Math.asin", "", ""}, + {"ColumnUnaryFunc", "FuncCos", "double", "double", "Math.cos", "", ""}, + {"ColumnUnaryFunc", "FuncACos", "double", "double", "Math.acos", "", ""}, + {"ColumnUnaryFunc", "FuncTan", "double", "double", "Math.tan", "", ""}, + {"ColumnUnaryFunc", "FuncATan", "double", "double", "Math.atan", "", ""}, + {"ColumnUnaryFunc", "FuncDegrees", "double", "double", "Math.toDegrees", "", ""}, + {"ColumnUnaryFunc", "FuncRadians", "double", "double", "Math.toRadians", "", ""}, + {"ColumnUnaryFunc", "FuncSign", "double", "double", "MathExpr.sign", "", ""}, + {"ColumnUnaryFunc", "FuncSign", "double", "long", "MathExpr.sign", "", ""}, + + {"ColumnUnaryMinus", "long"}, {"ColumnUnaryMinus", "double"}, @@ -355,6 +392,8 @@ private void generate() throws Exception { generateColumnArithmeticColumn(tdesc); } else if (tdesc[0].equals("ColumnUnaryMinus")) { generateColumnUnaryMinus(tdesc); + } else if (tdesc[0].equals("ColumnUnaryFunc")) { + generateColumnUnaryFunc(tdesc); } else if (tdesc[0].equals("VectorUDAFMinMax")) { generateVectorUDAFMinMax(tdesc); } else if (tdesc[0].equals("VectorUDAFMinMaxString")) { @@ -541,6 +580,33 @@ private void generateColumnUnaryMinus(String[] tdesc) throws IOException { writeFile(outputFile, templateString); } + // template, , , , , , + private void generateColumnUnaryFunc(String[] tdesc) throws IOException { + String classNamePrefix = tdesc[1]; + String operandType = tdesc[3]; + String inputColumnVectorType = this.getColumnVectorType(operandType); + String returnType = tdesc[2]; + String outputColumnVectorType = this.getColumnVectorType(returnType); + String className = classNamePrefix + getCamelCaseType(operandType) + "To" + + getCamelCaseType(returnType); + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + String funcName = tdesc[4]; + String operandCast = tdesc[5]; + String resultCast = tdesc[6]; + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", outputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", returnType); + templateString = templateString.replaceAll("", funcName); + templateString = templateString.replaceAll("", operandCast); + templateString = templateString.replaceAll("", resultCast); + writeFile(outputFile, templateString); + } + private void generateColumnArithmeticColumn(String [] tdesc) throws IOException { String operatorName = tdesc[1]; String operandType1 = tdesc[2]; diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt new file mode 100644 index 0000000..8ad3d90 --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryFunc.txt @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.MathExpr; +import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + public (int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public () { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + inputColVector = () batch.cols[colNum]; + outputColVector = () batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + [] vector = inputColVector.vector; + [] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outputVector[0] = ( vector[0]); + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ( vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ( vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = ( vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return ""; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } +} diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt index 085145a..dbcee4c 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnUnaryMinus.txt @@ -33,11 +33,13 @@ public class extends VectorExpression { private int outputColumn; public (int colNum, int outputColumn) { + this(); this.colNum = colNum; this.outputColumn = outputColumn; } public () { + super(); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java new file mode 100644 index 0000000..5400395 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +/** + * Vectorized implementation of Bin(long) function that returns string. + */ +public class FuncBin extends FuncLongToString { + private static final long serialVersionUID = 1L; + + FuncBin(int inputCol, int outputCol) { + super(inputCol, outputCol); + } + + FuncBin() { + super(); + } + + @Override + void prepareResult(int i, long[] vector, BytesColumnVector outV) { + long num = vector[i]; + // Extract the bits of num into bytes[] from right to left + int len = 0; + do { + len++; + bytes[bytes.length - len] = (byte) ('0' + (num & 1)); + num >>>= 1; + } while (num != 0); + outV.setVal(i, bytes, bytes.length - len, len); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java new file mode 100644 index 0000000..985deff --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +// Vectorized implementation of Hex(long) that returns string +public class FuncHex extends FuncLongToString { + private static final long serialVersionUID = 1L; + + FuncHex(int inputCol, int outputCol) { + super(inputCol, outputCol); + } + + FuncHex() { + super(); + } + + @Override + void prepareResult(int i, long[] vector, BytesColumnVector outV) { + long num = vector[i]; + // Extract the bits of num into bytes[] from right to left + int len = 0; + do { + len++; + bytes[bytes.length - len] = (byte) Character.toUpperCase(Character + .forDigit((int) (num & 0xF), 16)); + num >>>= 4; + } while (num != 0); + outV.setVal(i, bytes, bytes.length - len, len); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java new file mode 100644 index 0000000..d8243f6 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +public class FuncLogWithBaseDoubleToDouble extends MathFuncDoubleToDouble { + private static final long serialVersionUID = 1L; + + private double base; + + FuncLogWithBaseDoubleToDouble(int colNum, double base, int outputColumn) { + super(colNum, outputColumn); + this.base = base; + } + + FuncLogWithBaseDoubleToDouble() { + super(); + } + + @Override + double func(double d) { + return Math.log(d) / Math.log(base); + } + + public double getBase() { + return base; + } + + public void setBase(double base) { + this.base = base; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java new file mode 100644 index 0000000..cb9d4d1 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLongToString.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a long + * and return a string, optionally with additional configuraiton arguments. + * Used for bin(long), hex(long) etc. + */ +public abstract class FuncLongToString extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int inputCol; + private int outputCol; + protected transient byte[] bytes; + + FuncLongToString(int inputCol, int outputCol) { + this.inputCol = inputCol; + this.outputCol = outputCol; + bytes = new byte[64]; // staging area for results, to avoid new() calls + } + + FuncLongToString() { + bytes = new byte[64]; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector = inputColVector.vector; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputCol]; + outV.initBuffer(); + + if (n == 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + prepareResult(0, vector, outV); + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + prepareResult(i, vector, outV); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + prepareResult(i, vector, outV); + } + outV.isRepeating = false; + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + prepareResult(0, vector, outV); + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!inputColVector.isNull[i]) { + prepareResult(i, vector, outV); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + prepareResult(i, vector, outV); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } + } + } + + /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) + * and set position i of the output vector to the result. + */ + abstract void prepareResult(int i, long[] vector, BytesColumnVector outV); + + @Override + public int getOutputColumn() { + return outputCol; + } + + public int getOutputCol() { + return outputCol; + } + + public void setOutputCol(int outputCol) { + this.outputCol = outputCol; + } + + public int getInputCol() { + return inputCol; + } + + public void setInputCol(int inputCol) { + this.inputCol = inputCol; + } + + @Override + public String getOutputType() { + return "String"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java new file mode 100644 index 0000000..8d25489 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Vectorized implementation for Pow(a, power) and Power(a, power) + */ +public class FuncPowerDoubleToDouble extends MathFuncDoubleToDouble { + private static final long serialVersionUID = 1L; + + private double power; + + FuncPowerDoubleToDouble(int colNum, double power, int outputColumn) { + super(colNum, outputColumn); + this.power = power; + } + + FuncPowerDoubleToDouble() { + super(); + } + + @Override + public double func(double d) { + return Math.pow(d, power); + } + + public double getPower() { + return power; + } + + public void setPower(double power) { + this.power = power; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java new file mode 100644 index 0000000..0a05352 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import java.util.Random; + +/** + * Implements vectorized rand() and rand(seed) function evaluation. + */ +public class FuncRand extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int outputCol; + private Random random; + + FuncRand(int outputCol) { + this.outputCol = outputCol; + random = null; + } + + FuncRand(long seed, int outputCol) { + this.outputCol = outputCol; + random = new Random(seed); + } + + FuncRand() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputCol]; + int[] sel = batch.selected; + int n = batch.size; + double[] outputVector = outputColVector.vector; + outputColVector.noNulls = true; + outputColVector.isRepeating = false; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // For no-seed case, create new random number generator locally. + if (random == null) { + random = new Random(); + } + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = random.nextDouble(); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = random.nextDouble(); + } + } + } + + @Override + public int getOutputColumn() { + return outputCol; + } + + public int getOutputCol() { + return outputCol; + } + + public void setOutputCol(int outputCol) { + this.outputCol = outputCol; + } + + public Random getRandom() { + return random; + } + + public void setRandom(Random random) { + this.random = random; + } + + @Override + public String getOutputType() { + return "double"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java new file mode 100644 index 0000000..f790526 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +/** + * Math expression evaluation helper functions. + * Some of these are referenced from ColumnUnaryFunc.txt. + */ +public class MathExpr { + + // Round using the "half-up" method used in Hive. + public static double round(double d) { + if (d > 0.0) { + return (double) ((long) (d + 0.5d)); + } else { + return (double) ((long) (d - 0.5d)); + } + } + + public static double log2(double d) { + return Math.log(d) / Math.log(2); + } + + public static long abs(long v) { + return v >= 0 ? v : -v; + } + + public static double sign(double v) { + return v >= 0 ? 1.0 : -1.0; + } + + public static double sign(long v) { + return v >= 0 ? 1.0 : -1.0; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java new file mode 100644 index 0000000..4d8c82d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Implement vectorized math function that takes a double (and optionally additional + * constant argument(s)) and returns long. + * May be used for functions like ROUND(d, N), Pow(a, p) etc. + * + * Do NOT use this for simple math functions lone sin/cos/exp etc. that just take + * a single argument. For those, modify the template ColumnUnaryFunc.txt + * and expand the template to generate needed classes. + */ +public abstract class MathFuncDoubleToDouble extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int colNum; + private int outputColumn; + + // Subclasses must override this with a function that implements the desired logic. + abstract double func(double d); + + MathFuncDoubleToDouble(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + MathFuncDoubleToDouble() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + this.evaluateChildren(batch); + } + + DoubleColumnVector inputColVector = (DoubleColumnVector) batch.cols[colNum]; + DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] inputIsNull = inputColVector.isNull; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = inputColVector.noNulls; + int n = batch.size; + double[] vector = inputColVector.vector; + double[] outputVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.isRepeating) { + outputVector[0] = func(vector[0]); + + // Even if there are no nulls, we always copy over entry 0. Simplifies code. + outputIsNull[0] = inputIsNull[0]; + outputColVector.isRepeating = true; + } else if (inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = func(vector[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = func(vector[i]); + } + } + outputColVector.isRepeating = false; + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outputVector[i] = func(vector[i]); + outputIsNull[i] = inputIsNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = func(vector[i]); + } + System.arraycopy(inputIsNull, 0, outputIsNull, 0, n); + } + outputColVector.isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + @Override + public String getOutputType() { + return "double"; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundDoubleToDoubleWithNumDigits.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundDoubleToDoubleWithNumDigits.java new file mode 100644 index 0000000..af75b66 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/RoundDoubleToDoubleWithNumDigits.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.udf.UDFRound; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; + +// Vectorized implementation of ROUND(Col, N) function +public class RoundDoubleToDoubleWithNumDigits extends MathFuncDoubleToDouble { + private static final long serialVersionUID = 1L; + + private IntWritable decimalPlaces; + private UDFRound roundFunc; + private transient DoubleWritable dw; + + RoundDoubleToDoubleWithNumDigits(int colNum, int decimalPlaces, int outputColumn) { + super(colNum, outputColumn); + this.decimalPlaces = new IntWritable(); + this.decimalPlaces.set(decimalPlaces); + roundFunc = new UDFRound(); + dw = new DoubleWritable(); + } + + RoundDoubleToDoubleWithNumDigits() { + super(); + dw = new DoubleWritable(); + } + + // Round to the specified number of decimal places using the standard Hive round function. + @Override + public double func(double d) { + dw.set(d); + return roundFunc.evaluate(dw, decimalPlaces).get(); + } + + void setDecimalPlaces(IntWritable decimalPlaces) { + this.decimalPlaces = decimalPlaces; + } + + IntWritable getDecimalPlaces() { + return this.decimalPlaces; + } + + void setRoundFunc(UDFRound roundFunc) { + this.roundFunc = roundFunc; + } + + UDFRound getRoundFunc() { + return this.roundFunc; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConv.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConv.java new file mode 100644 index 0000000..2e62531 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConv.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.ql.udf.UDFConv; +import org.apache.hadoop.io.IntWritable; + + +/** + * Implement vectorized function conv(string, int, int) returning string. + * Support for use on numbers instead of strings shall be implemented + * by inserting an explicit cast to string. There will not be VectorExpression + * classes specifically for conv applied to numbers. + */ +public class StringConv extends StringUnaryUDF { + private static final long serialVersionUID = 1L; + + StringConv(int colNum, int outputColumn, int fromBase, int toBase) { + super(colNum, outputColumn, (IUDFUnaryString) new ConvWrapper(fromBase, toBase)); + } + + StringConv() { + super(); + } + + /* This wrapper class implements the evaluate() method expected + * by the superclass for use in the inner loop of vectorized expression + * evaluation. It holds the fromBase and toBase arguments to + * make the interface simply "Text evaluate(Text)" as expected. + */ + static class ConvWrapper implements IUDFUnaryString { + UDFConv conv; + IntWritable fromBase; + IntWritable toBase; + + ConvWrapper(int fromBase, int toBase) { + conv = new UDFConv(); + this.fromBase = new IntWritable(fromBase); + this.toBase = new IntWritable(toBase); + } + + @Override + public Text evaluate(Text s) { + return conv.evaluate(s, fromBase, toBase); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringHex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringHex.java new file mode 100644 index 0000000..0e7384d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringHex.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF; +import org.apache.hadoop.hive.ql.udf.UDFHex; + +// Implement vectorized function Hex(string) returning string +public class StringHex extends StringUnaryUDF { + StringHex(int colNum, int outputColumn) { + super(colNum, outputColumn, (IUDFUnaryString) new UDFHex()); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 7dab821..8c2a348 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.io.Text; @@ -65,6 +67,7 @@ public void evaluate(VectorizedRowBatch batch) { int [] start = inputColVector.start; int [] length = inputColVector.length; BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + outV.initBuffer(); Text t; if (n == 0) { @@ -79,25 +82,33 @@ public void evaluate(VectorizedRowBatch batch) { // existing built-in function. if (inputColVector.noNulls) { - outV.noNulls = true; + outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; s.set(vector[0], start[0], length[0]); t = func.evaluate(s); - outV.setRef(0, t.getBytes(), 0, t.getLength()); + setString(outV, 0, t); } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; + + /* Fill output isNull with false for selected elements since there is a chance we'll + * convert to noNulls == false in setString(); + */ + outV.isNull[i] = false; s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - outV.setRef(i, t.getBytes(), 0, t.getLength()); + setString(outV, i, t); } outV.isRepeating = false; } else { + + // Set all elements to not null. The setString call can override this. + Arrays.fill(outV.isNull, 0, n - 1, false); for(int i = 0; i != n; i++) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - outV.setRef(i, t.getBytes(), 0, t.getLength()); + setString(outV, i, t); } outV.isRepeating = false; } @@ -107,36 +118,50 @@ public void evaluate(VectorizedRowBatch batch) { outV.noNulls = false; if (inputColVector.isRepeating) { outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; + outV.isNull[0] = inputColVector.isNull[0]; // setString can override this if (!inputColVector.isNull[0]) { s.set(vector[0], start[0], length[0]); t = func.evaluate(s); - outV.setRef(0, t.getBytes(), 0, t.getLength()); + setString(outV, 0, t); } } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { + for(int j = 0; j != n; j++) { int i = sel[j]; + outV.isNull[i] = inputColVector.isNull[i]; // setString can override this if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - outV.setRef(i, t.getBytes(), 0, t.getLength()); - } - outV.isNull[i] = inputColVector.isNull[i]; + setString(outV, i, t); + } } outV.isRepeating = false; } else { + + // setString can override this null propagation + System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { s.set(vector[i], start[i], length[i]); t = func.evaluate(s); - outV.setRef(i, t.getBytes(), 0, t.getLength()); + setString(outV, i, t); } - outV.isNull[i] = inputColVector.isNull[i]; } outV.isRepeating = false; } } } + + /* Set the output string entry i to the contents of Text object t. + * If t is a null object reference, record that the value is a SQL NULL. + */ + private static void setString(BytesColumnVector outV, int i, Text t) { + if (t == null) { + outV.noNulls = false; + outV.isNull[i] = true; + return; + } + outV.setVal(i, t.getBytes(), 0, t.getLength()); + } @Override public int getOutputColumn() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnhex.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnhex.java new file mode 100644 index 0000000..7193ecf --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnhex.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF.IUDFUnaryString; +import org.apache.hadoop.hive.ql.udf.UDFUnhex; +import org.apache.hadoop.io.Text; + +public class StringUnhex extends StringUnaryUDF { + private static final long serialVersionUID = 1L; + + StringUnhex(int colNum, int outputColumn) { + super(colNum, outputColumn, new IUDFUnaryString() { + + // Wrap the evaluate method of UDFUnhex to make it return the expected type, Text. + @Override + public Text evaluate(Text s) { + final UDFUnhex unhex = new UDFUnhex(); + byte[] b = unhex.evaluate(s); + if (b == null) { + return null; + } + return new Text(b); + } + + }); + } + + StringUnhex() { + super(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java index d52afd4..fdb4436 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFHex.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringUnaryUDF.IUDFUnaryString; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -39,7 +40,7 @@ + " 'H1'\n" + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " '46616365626F6F6B'") -public class UDFHex extends UDF { +public class UDFHex extends UDF implements IUDFUnaryString { private final Text result = new Text(); private byte[] value = new byte[16]; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java new file mode 100644 index 0000000..9646297 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -0,0 +1,569 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; + +import junit.framework.Assert; + +import org.apache.hadoop.hbase.client.coprocessor.Batch; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.*; +import org.junit.Test; + + +public class TestVectorMathFunctions { + + private static final double eps = 1.0e-7; + private static boolean equalsWithinTolerance(double a, double b) { + return Math.abs(a - b) < eps; + } + + @Test + public void testVectorRound() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + VectorExpression expr = new FuncRoundDoubleToDouble(0, 1); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr.evaluate(b); + Assert.assertEquals(-2d, resultV.vector[0]); + Assert.assertEquals(-1d, resultV.vector[1]); + Assert.assertEquals(0d, resultV.vector[2]); + Assert.assertEquals(0d, resultV.vector[3]); + Assert.assertEquals(1d, resultV.vector[4]); + Assert.assertEquals(1d, resultV.vector[5]); + Assert.assertEquals(2d, resultV.vector[6]); + + // spot check null propagation + b.cols[0].noNulls = false; + b.cols[0].isNull[3] = true; + resultV.noNulls = true; + expr.evaluate(b); + Assert.assertEquals(true, resultV.isNull[3]); + Assert.assertEquals(false, resultV.noNulls); + + // check isRepeating propagation + b.cols[0].isRepeating = true; + resultV.isRepeating = false; + expr.evaluate(b); + Assert.assertEquals(-2d, resultV.vector[0]); + Assert.assertEquals(true, resultV.isRepeating); + + resultV.isRepeating = false; + b.cols[0].noNulls = true; + expr.evaluate(b); + Assert.assertEquals(-2d, resultV.vector[0]); + Assert.assertEquals(true, resultV.isRepeating); + } + + @Test + public void testRoundToDecimalPlaces() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + VectorExpression expr = new RoundDoubleToDoubleWithNumDigits(0, 4, 1); + expr.evaluate(b); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + + // Verify result is rounded to 4 digits + Assert.assertEquals(1.2346d, resultV.vector[7]); + } + + public static VectorizedRowBatch getVectorizedRowBatchDoubleInLongOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector lcv; + DoubleColumnVector dcv; + lcv = new LongColumnVector(); + dcv = new DoubleColumnVector(); + dcv.vector[0] = -1.5d; + dcv.vector[1] = -0.5d; + dcv.vector[2] = -0.1d; + dcv.vector[3] = 0d; + dcv.vector[4] = 0.5d; + dcv.vector[5] = 0.7d; + dcv.vector[6] = 1.5d; + + batch.cols[0] = dcv; + batch.cols[1] = lcv; + + batch.size = 7; + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchDoubleInDoubleOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + DoubleColumnVector inV; + DoubleColumnVector outV; + outV = new DoubleColumnVector(); + inV = new DoubleColumnVector(); + inV.vector[0] = -1.5d; + inV.vector[1] = -0.5d; + inV.vector[2] = -0.1d; + inV.vector[3] = 0d; + inV.vector[4] = 0.5d; + inV.vector[5] = 0.7d; + inV.vector[6] = 1.5d; + inV.vector[7] = 1.2345678d; + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 8; + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchLongInDoubleOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector lcv; + DoubleColumnVector dcv; + lcv = new LongColumnVector(); + dcv = new DoubleColumnVector(); + lcv.vector[0] = -2; + lcv.vector[1] = -1; + lcv.vector[2] = 0; + lcv.vector[3] = 1; + lcv.vector[4] = 2; + + batch.cols[0] = lcv; + batch.cols[1] = dcv; + + batch.size = 5; + return batch; + } + + public static VectorizedRowBatch getVectorizedRowBatchLongInLongOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + LongColumnVector inV, outV; + inV = new LongColumnVector(); + outV = new LongColumnVector(); + inV.vector[0] = -2; + inV.vector[1] = 2; + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 2; + return batch; + } + + public static VectorizedRowBatch getBatchForStringMath() { + VectorizedRowBatch batch = new VectorizedRowBatch(3); + LongColumnVector inL; + BytesColumnVector inS, outS; + inL = new LongColumnVector(); + inS = new BytesColumnVector(); + outS = new BytesColumnVector(); + inL.vector[0] = 0; + inL.vector[1] = 255; + inL.vector[2] = 0; + inS.initBuffer(); + try { + inS.setVal(0, "00".getBytes("UTF-8"), 0, 2); + inS.setVal(1, "3232".getBytes("UTF-8"), 0, 4); + byte[] bad = "bad data".getBytes("UTF-8"); + inS.setVal(2, bad, 0, bad.length); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + Assert.assertTrue(false); + } + + batch.cols[0] = inS; + batch.cols[1] = inL; + batch.cols[2] = outS; + + batch.size = 3; + return batch; + } + + /* + * The following tests spot-check that vectorized functions with signature + * DOUBLE func(DOUBLE) that came from template ColumnUnaryFunc.txt + * get the right result. Null propagation, isRepeating + * propagation will be checked once for a single expansion of the template + * (for FuncRoundDoubleToDouble). + */ + @Test + public void testVectorSin() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncSinDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.sin(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorCos() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncCosDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.cos(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorTan() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncTanDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.tan(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorASin() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncASinDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.asin(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorACos() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncACosDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.acos(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorATan() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncATanDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.atan(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorDegrees() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncDegreesDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.toDegrees(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorRadians() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncRadiansDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.toRadians(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorFloor() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInLongOut(); + LongColumnVector resultV = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncFloorDoubleToLong(0, 1); + expr.evaluate(b); + Assert.assertEquals(-2, resultV.vector[0]); + Assert.assertEquals(1, resultV.vector[6]); + } + + @Test + public void testVectorCeil() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInLongOut(); + LongColumnVector resultV = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncCeilDoubleToLong(0, 1); + expr.evaluate(b); + Assert.assertEquals(-1, resultV.vector[0]); + Assert.assertEquals(2, resultV.vector[6]); + } + + @Test + public void testVectorExp() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncExpDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.exp(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorLn() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncLnDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.log(0.5), resultV.vector[4]); + + // test long->double version + b = getVectorizedRowBatchLongInDoubleOut(); + resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new FuncLnLongToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.log(2), resultV.vector[4]); + } + + @Test + public void testVectorLog2() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncLog2DoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.log(0.5d) / Math.log(2), resultV.vector[4]); + + // test long->double version + b = getVectorizedRowBatchLongInDoubleOut(); + resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new FuncLog2LongToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.log(1) / Math.log(2), resultV.vector[3]); + } + + @Test + public void testVectorLog10() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncLog10DoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertTrue(equalsWithinTolerance(Math.log(0.5d) / Math.log(10), resultV.vector[4])); + + // test long->double version + b = getVectorizedRowBatchLongInDoubleOut(); + resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new FuncLog10LongToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.log(1) / Math.log(10), resultV.vector[3]); + } + + @Test + public void testVectorRand() { + VectorizedRowBatch b = new VectorizedRowBatch(1); + DoubleColumnVector v = new DoubleColumnVector(); + b.cols[0] = v; + b.size = VectorizedRowBatch.DEFAULT_SIZE; + int n = b.size; + v.noNulls = true; + VectorExpression expr = new FuncRand(0); + expr.evaluate(b); + double sum = 0; + for(int i = 0; i != n; i++) { + sum += v.vector[i]; + Assert.assertTrue(v.vector[i] >= 0.0 && v.vector[i] <= 1.0); + } + double avg = sum / n; + + /* The random values must be between 0 and 1, distributed uniformly. + * So the average value of a large set should be about 0.5. Verify it is + * close to this value. + */ + Assert.assertTrue(avg > 0.3 && avg < 0.7); + + // Now, test again with a seed. + Arrays.fill(v.vector, 0); + expr = new FuncRand(99999, 0); + expr.evaluate(b); + sum = 0; + for(int i = 0; i != n; i++) { + sum += v.vector[i]; + Assert.assertTrue(v.vector[i] >= 0.0 && v.vector[i] <= 1.0); + } + avg = sum / n; + Assert.assertTrue(avg > 0.3 && avg < 0.7); + } + + @Test + public void testVectorLogBase() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncLogWithBaseDoubleToDouble(0, 10.0d, 1); + expr.evaluate(b); + Assert.assertTrue(equalsWithinTolerance(Math.log(0.5d) / Math.log(10), resultV.vector[4])); + } + + @Test + public void testVectorPower() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncPowerDoubleToDouble(0, 2.0d, 1); + expr.evaluate(b); + Assert.assertTrue(equalsWithinTolerance(0.5d * 0.5d, resultV.vector[4])); + } + + @Test + public void testVectorSqrt() { + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncSqrtDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(Math.sqrt(0.5d), resultV.vector[4]); + } + + @Test + public void testVectorAbs() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncAbsDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(1.5, resultV.vector[0]); + Assert.assertEquals(0.5, resultV.vector[4]); + + // test long->long version + b = getVectorizedRowBatchLongInLongOut(); + LongColumnVector resultVLong = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new FuncAbsLongToLong(0, 1); + expr.evaluate(b); + Assert.assertEquals(2, resultVLong.vector[0]); + Assert.assertEquals(2, resultVLong.vector[1]); + } + + @Test + public void testVectorSign() { + + // test double->double version + VectorizedRowBatch b = getVectorizedRowBatchDoubleInDoubleOut(); + DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncSignDoubleToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(-1.0d, resultV.vector[0]); + Assert.assertEquals(1.0d, resultV.vector[4]); + + // test long->double version + b = getVectorizedRowBatchLongInDoubleOut(); + resultV = (DoubleColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + expr = new FuncSignLongToDouble(0, 1); + expr.evaluate(b); + Assert.assertEquals(-1.0d, resultV.vector[0]); + Assert.assertEquals(1.0d, resultV.vector[4]); + } + + @Test + public void testVectorBin() { + + // test conversion of long->string + VectorizedRowBatch b = getBatchForStringMath(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; + b.cols[0].noNulls = true; + VectorExpression expr = new FuncBin(1, 2); + expr.evaluate(b); + String s = new String(resultV.vector[1], resultV.start[1], resultV.length[1]); + Assert.assertEquals("11111111", s); + } + + @Test + public void testVectorHex() { + + // test long->string version + VectorizedRowBatch b = getBatchForStringMath(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; + b.cols[1].noNulls = true; + VectorExpression expr = new FuncHex(1, 2); + expr.evaluate(b); + String s = new String(resultV.vector[1], resultV.start[1], resultV.length[1]); + Assert.assertEquals("FF", s); + + // test string->string version + b = getBatchForStringMath(); + resultV = (BytesColumnVector) b.cols[2]; + b.cols[0].noNulls = true; + expr = new StringHex(0, 2); + expr.evaluate(b); + s = new String(resultV.vector[1], resultV.start[1], resultV.length[1]); + Assert.assertEquals("33323332", s); + } + + @Test + public void testVectorUnhex() { + + // test string->string version + VectorizedRowBatch b = getBatchForStringMath(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; + b.cols[0].noNulls = true; + resultV.noNulls = true; + VectorExpression expr = new StringUnhex(0, 2); + expr.evaluate(b); + String s = new String(resultV.vector[1], resultV.start[1], resultV.length[1]); + Assert.assertEquals("22", s); + + // check for NULL output for entry 2 ("bad data") + Assert.assertEquals(false, resultV.noNulls); + Assert.assertEquals(true, resultV.isNull[2]); + } + + @Test + public void testVectorConv() { + + // test string->string version + VectorizedRowBatch b = getBatchForStringMath(); + BytesColumnVector resultV = (BytesColumnVector) b.cols[2]; + b.cols[0].noNulls = true; + resultV.noNulls = true; + VectorExpression expr = new StringConv(0, 2, 10, 16); + expr.evaluate(b); + String s[] = new String[3]; + try { + s[0] = new String(resultV.vector[0], resultV.start[0], resultV.length[0], "UTF-8"); + s[1] = new String(resultV.vector[1], resultV.start[1], resultV.length[1], "UTF-8"); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + Assert.assertEquals("0", s[0]); + Assert.assertEquals("CA0", s[1]); + + // Note: the documentation says that "bad data" should translate to NULL output, + // but that is not the case in the existing code, which returns "0". So, this test skips + // checking output for element 2. + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index c94d7c5..1d44abf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -391,6 +391,7 @@ VectorizedRowBatch makeStringBatchMixedCase() { BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); batch.cols[0] = v; BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + outV.initBuffer(); batch.cols[1] = outV; /* * Add these 3 values: