diff --git a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index 2ee0451..99262ec 100644 --- a/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ b/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -276,6 +276,11 @@ {"FilterColumnCompareColumn", "GreaterEqual", "long", "long", ">="}, {"FilterColumnCompareColumn", "GreaterEqual", "double", "long", ">="}, + {"FilterColumnBetween", "long", ""}, + {"FilterColumnBetween", "double", ""}, + {"FilterColumnBetween", "long", "!"}, + {"FilterColumnBetween", "double", "!"}, + {"ColumnCompareColumn", "Equal", "long", "double", "=="}, {"ColumnCompareColumn", "Equal", "double", "double", "=="}, {"ColumnCompareColumn", "NotEqual", "long", "double", "!="}, @@ -511,6 +516,8 @@ private void generate() throws Exception { generateFilterColumnCompareScalar(tdesc); } else if (tdesc[0].equals("FilterScalarCompareColumn")) { generateFilterScalarCompareColumn(tdesc); + } else if (tdesc[0].equals("FilterColumnBetween")) { + generateFilterColumnBetween(tdesc); } else if (tdesc[0].equals("ScalarArithmeticColumn")) { generateScalarArithmeticColumn(tdesc); } else if (tdesc[0].equals("FilterColumnCompareColumn")) { @@ -553,6 +560,26 @@ private void generate() throws Exception { testCodeGen.generateTestSuites(); } + private void generateFilterColumnBetween(String[] tdesc) throws IOException { + String operandType = tdesc[1]; + String optionalNot = tdesc[2]; + + String className = "Filter" + getCamelCaseType(operandType) + "Column" + + (optionalNot.equals("!") ? "Not" : "") + "Between"; + String inputColumnVectorType = getColumnVectorType(operandType); + String outputFile = joinPath(this.expressionOutputDirectory, className + ".java"); + + // Read the template into a string, expand it, and write it. + String templateFile = joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt"); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", inputColumnVectorType); + templateString = templateString.replaceAll("", operandType); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(outputFile, templateString); + } + private void generateColumnCompareColumn(String[] tdesc) throws IOException { //The variables are all same as ColumnCompareScalar except that //this template doesn't need a return type. Pass anything as return type. diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt new file mode 100644 index 0000000..47be94a --- /dev/null +++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetween.txt @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Generated from template FilterColumnBetween.txt, which covers [NOT] BETWEEN filter + * expressions where a column is [NOT] between one scalar and another. + * Output is not produced in a separate column. The selected vector of the input + * {@link VectorizedRowBatch} is updated for in-place filtering. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + + // The comparison is of the form "column BETWEEN leftValue AND rightValue" + private leftValue; + private rightValue; + + public (int colNum, leftValue, rightValue) { + this.colNum = colNum; + this.leftValue = leftValue; + this.rightValue = rightValue; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + inputColVector = () batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + [] vector = inputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if ((vector[0] < leftValue || vector[0] > rightValue)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if ((leftValue <= vector[i] && vector[i] <= rightValue)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if ((leftValue <= vector[i] && vector[i] <= rightValue)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. + // Repeating property will not change. + if (!nullPos[0]) { + if ((vector[0] < leftValue || vector[0] > rightValue)) { + + // Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if ((leftValue <= vector[i] && vector[i] <= rightValue)) { + sel[newSize++] = i; + } + } + } + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if ((leftValue <= vector[i] && vector[i] <= rightValue)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public getLeftValue() { + return leftValue; + } + + public void setLeftValue( value) { + this.leftValue = value; + } + + public getRightValue() { + return rightValue; + } + + public void setRightValue( value) { + this.leftValue = value; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 79437a5..0ceac59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -76,6 +76,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterDoubleColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -86,6 +90,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; import org.apache.hadoop.hive.ql.udf.UDFAtan; @@ -160,8 +165,10 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; /** @@ -606,11 +613,98 @@ private VectorExpression getVectorExpression(GenericUDF udf, return getUnaryAbsExpression(childExpr); } else if (udf instanceof GenericUDFTimestamp) { return getCastToTimestamp(childExpr); + } else if (udf instanceof GenericUDFBetween) { + return getBetweenFilterExpression(childExpr); } throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); } + // Get a [NOT] BETWEEN filter expression + private VectorExpression getBetweenFilterExpression(List childExpr) + throws HiveException { + + ExprNodeDesc colExpr = childExpr.get(1); + ExprNodeDesc leftExpr = childExpr.get(2); + ExprNodeDesc rightExpr = childExpr.get(3); + + // Fold constants for - or + operators in front of constant values + leftExpr = foldConstantsForUnaryExpression(leftExpr); + rightExpr = foldConstantsForUnaryExpression(rightExpr); + + VectorExpression v1 = null; + VectorExpression expr = null; + int inputCol; + ExprNodeConstantDesc constDesc; + boolean notKeywordPresent = getBooleanScalar(childExpr.get(0)); + long leftL = Long.MIN_VALUE; + long rightL = Long.MIN_VALUE; + double leftD = Double.NaN; + double rightD = Double.NaN; + + if (isIntFamily(colExpr.getTypeString())) { + leftL = getLongScalar(leftExpr); + rightL = getLongScalar(rightExpr); + } else if (isFloatFamily(colExpr.getTypeString())) { + leftD = getDoubleScalar(leftExpr); + rightD = getDoubleScalar(rightExpr); + } else { + throw new HiveException("Invalid type for vectorized [NOT] BETWEEN"); + } + + // TODO finish the rest + + // It's assumed that the code will not allow us into this function unless + // The left and right arguments are constants and their types match + // the input column or expression type. + + if ((colExpr instanceof ExprNodeColumnDesc) + && isIntFamily(colExpr.getTypeString())) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) colExpr; + inputCol = getInputColumnIndex(colDesc.getColumn()); + if (notKeywordPresent) { + expr = (VectorExpression) new FilterLongColumnNotBetween(inputCol, leftL, rightL); + } else { + expr = (VectorExpression) new FilterLongColumnBetween(inputCol, leftL, rightL); + } + } else if ((colExpr instanceof ExprNodeGenericFuncDesc) + && isIntFamily(colExpr.getTypeString())) { + v1 = getVectorExpression(colExpr); + inputCol = v1.getOutputColumn(); + if (notKeywordPresent) { + expr = (VectorExpression) new FilterLongColumnNotBetween(inputCol, leftL, rightL); + } else { + expr = (VectorExpression) new FilterLongColumnBetween(inputCol, leftL, rightL); + } + } else if ((colExpr instanceof ExprNodeColumnDesc) + && isFloatFamily(colExpr.getTypeString())) { + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) colExpr; + inputCol = getInputColumnIndex(colDesc.getColumn()); + if (notKeywordPresent) { + expr = (VectorExpression) new FilterDoubleColumnNotBetween(inputCol, leftD, rightD); + } else { + expr = (VectorExpression) new FilterDoubleColumnBetween(inputCol, leftD, rightD); + } + } else if ((colExpr instanceof ExprNodeGenericFuncDesc) + && isFloatFamily(colExpr.getTypeString())) { + v1 = getVectorExpression(colExpr); + inputCol = v1.getOutputColumn(); + if (notKeywordPresent) { + expr = (VectorExpression) new FilterDoubleColumnNotBetween(inputCol, leftD, rightD); + } else { + expr = (VectorExpression) new FilterDoubleColumnBetween(inputCol, leftD, rightD); + } + } else { + throw new HiveException("Type " + colExpr.getTypeString() + + "not supported for vectorized BETWEEN"); + } + if (v1 != null) { + expr.setChildExpressions(new VectorExpression [] {v1}); + ocm.freeOutputColumn(v1.getOutputColumn()); + } + return expr; + } + private VectorExpression getUnaryAbsExpression(List childExpr) throws HiveException { String argType = childExpr.get(0).getTypeString(); @@ -1016,6 +1110,21 @@ private long getLongScalar(ExprNodeDesc expr) throws HiveException { + "Expecting integer or bigint"); } + private boolean getBooleanScalar(ExprNodeDesc expr) throws HiveException { + if (!(expr instanceof ExprNodeConstantDesc)) { + throw new HiveException("Constant value expected for UDF argument. " + + "Non-constant argument not supported for vectorization."); + } + ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; + + if (constExpr.getTypeString().equalsIgnoreCase("boolean")) { + return (Boolean) constExpr.getValue(); + } + + throw new HiveException("Udf: unhandled constant type for scalar argument. " + + "Expecting boolean"); + } + /* Return a vector expression for string concatenation, including the column-scalar, * scalar-column, and column-column cases. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 3e6edb5..8932825 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -125,6 +125,7 @@ import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower; @@ -235,6 +236,7 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFUpper.class); supportedGenericUDFs.add(GenericUDFConcat.class); supportedGenericUDFs.add(GenericUDFAbs.class); + supportedGenericUDFs.add(GenericUDFBetween.class); // For type casts supportedGenericUDFs.add(UDFToLong.class);