diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 3d8ade6..1da6ece 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -31,6 +31,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression; @@ -42,6 +44,8 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFArgDesc; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; @@ -141,6 +145,17 @@ private int getInputColumnIndex(String name) { } } + /* Return true if we are running in the planner, and false if we + * are running in a task. + */ + /* + private boolean isPlanner() { + + // This relies on the behavior that columnMap is null in the planner. + return columnMap == null; + } + */ + private class OutputColumnManager { private final int initialOutputCol; private int outputColCount = 0; @@ -243,8 +258,12 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveEx ve = getVectorExpression((ExprNodeColumnDesc) exprDesc); } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc; - ve = getVectorExpression(expr.getGenericUDF(), - expr.getChildExprs()); + if (isCustomUDF(expr)) { + ve = getCustomUDFExpression(expr); + } else { + ve = getVectorExpression(expr.getGenericUDF(), + expr.getChildExprs()); + } } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc); } @@ -254,6 +273,21 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveEx return ve; } + // Return true if this is a custom UDF or custom GenericUDF. + // This is for use only in the planner. It will fail in a task. + public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) { + String udfName = expr.getFuncText(); + if (udfName == null) { + return false; + } + FunctionInfo funcInfo = FunctionRegistry.getFunctionInfo(udfName); + if (funcInfo == null) { + return false; + } + boolean isNativeFunc = funcInfo.isNative(); + return !isNativeFunc; + } + /** * Handles only the special case of unary operators on a constant. * @param exprDesc @@ -474,6 +508,104 @@ private VectorExpression getVectorExpression(GenericUDFBridge udf, throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported"); } + /* + * Return vector expression for a custom (i.e. not built-in) UDF. + */ + private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) + throws HiveException { + + //GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF(); + List childExprList = expr.getChildExprs(); + + // argument descriptors + VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildExprs().size()]; + for (int i = 0; i < argDescs.length; i++) { + argDescs[i] = new VectorUDFArgDesc(); + } + + // positions of variable arguments (columns or non-constant expressions) + List variableArgPositions = new ArrayList(); + + // Column numbers of batch corresponding to expression result arguments + List exprResultColumnNums = new ArrayList(); + + // Prepare children + List vectorExprs = new ArrayList(); + + for (int i = 0; i < childExprList.size(); i++) { + ExprNodeDesc child = childExprList.get(i); + if (child instanceof ExprNodeGenericFuncDesc) { + VectorExpression e = getVectorExpression(child); + vectorExprs.add(e); + variableArgPositions.add(i); + exprResultColumnNums.add(e.getOutputColumn()); + argDescs[i].setVariable(e.getOutputColumn()); + } else if (child instanceof ExprNodeColumnDesc) { + variableArgPositions.add(i); + argDescs[i].setVariable(getInputColumnIndex(((ExprNodeColumnDesc) child).getColumn())); + } else if (child instanceof ExprNodeConstantDesc) { + + // this is a constant + argDescs[i].setConstant((ExprNodeConstantDesc) child); + } else { + throw new HiveException("Unable to vectorize Custom UDF"); + } + } + + // Allocate output column and get column number; + int outputCol = -1; + String resultColVectorType; + String resultType = expr.getTypeInfo().getTypeName(); + if (resultType.equalsIgnoreCase("string")) { + resultColVectorType = "String"; + } else if (isIntFamily(resultType)) { + resultColVectorType = "Long"; + } else if (isFloatFamily(resultType)) { + resultColVectorType = "Double"; + } else if (resultType.equalsIgnoreCase("timestamp")) { + resultColVectorType = "Long"; + } else { + throw new HiveException("Unable to vectorize due to unsupported custom UDF return type " + + resultType); + } + outputCol = ocm.allocateOutputColumn(resultColVectorType); + + // Make vectorized operator + VectorExpression ve; + ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs); + + // Set child expressions + VectorExpression[] childVEs = null; + if (exprResultColumnNums.size() != 0) { + childVEs = new VectorExpression[exprResultColumnNums.size()]; + for (int i = 0; i < childVEs.length; i++) { + childVEs[i] = vectorExprs.get(i); + } + } + ve.setChildExpressions(childVEs); + + // Free output columns if inputs have non-leaf expression trees. + for (Integer i : exprResultColumnNums) { + ocm.freeOutputColumn(i); + } + return ve; + } + + // return true if this is any kind of float + public static boolean isFloatFamily(String resultType) { + return resultType.equalsIgnoreCase("double") + || resultType.equalsIgnoreCase("float"); + } + + // Return true if this data type is handled in the output vector as an integer. + public static boolean isIntFamily(String resultType) { + return resultType.equalsIgnoreCase("tinyint") + || resultType.equalsIgnoreCase("smallint") + || resultType.equalsIgnoreCase("int") + || resultType.equalsIgnoreCase("bigint") + || resultType.equalsIgnoreCase("boolean"); + } + /* Return a unary string vector expression. This is used for functions like * UPPER() and LOWER(). */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFAdaptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFAdaptor.java new file mode 100644 index 0000000..aa9b8e7 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFAdaptor.java @@ -0,0 +1,343 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.io.Text; + +import java.io.IOException; +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.ArrayList; + +/** + * A VectorUDFAdaptor is a vectorized expression for invoking a custom + * UDF on zero or more input vectors or constants which are the function arguments. + */ +public class VectorUDFAdaptor extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int outputColumn; + private String resultType; + private VectorUDFArgDesc[] argDescs; + private ExprNodeGenericFuncDesc expr; + + private transient GenericUDF genericUDF; + private transient GenericUDF.DeferredObject[] deferredChildren; + private transient ObjectInspector outputOI; + private transient ObjectInspector[] childrenOIs; + private transient VectorExpressionWriter[] writers; + + public VectorUDFAdaptor() { + super(); + } + + public VectorUDFAdaptor ( + ExprNodeGenericFuncDesc expr, + int outputColumn, + String resultType, + VectorUDFArgDesc[] argDescs) throws HiveException { + + this(); + this.expr = expr; + this.outputColumn = outputColumn; + this.resultType = resultType; + this.argDescs = argDescs; + } + + // Initialize transient fields. To be called after deserialization of other fields. + public void init() throws HiveException, UDFArgumentException { + genericUDF = expr.getGenericUDF(); + deferredChildren = new GenericUDF.DeferredObject[expr.getChildExprs().size()]; + childrenOIs = new ObjectInspector[expr.getChildExprs().size()]; + writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildExprs()); + for (int i = 0; i < childrenOIs.length; i++) { + childrenOIs[i] = writers[i].getObjectInspector(); + } + outputOI = VectorExpressionWriterFactory.genVectorExpressionWritable(expr) + .getObjectInspector(); + + genericUDF.initialize(childrenOIs); + + // Initialize constant arguments + for (int i = 0; i < argDescs.length; i++) { + if (argDescs[i].isConstant()) { + argDescs[i].prepareConstant(); + } + } + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (genericUDF == null) { + try { + init(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + int[] sel = batch.selected; + int n = batch.size; + ColumnVector outV = batch.cols[outputColumn]; + + // If the output column is of type string, initialize the buffer to receive data. + if (outV instanceof BytesColumnVector) { + ((BytesColumnVector) outV).initBuffer(); + } + + if (n == 0) { + //Nothing to do + return; + } + + batch.cols[outputColumn].noNulls = true; + + /* If all input columns are repeating, just evaluate function + * for row 0 in the batch and set output repeating. + */ + if (allInputColsRepeating(batch)) { + setResult(0, batch); + batch.cols[outputColumn].isRepeating = true; + return; + } else { + batch.cols[outputColumn].isRepeating = false; + } + + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + setResult(i, batch); + } + } else { + for (int i = 0; i != n; i++) { + setResult(i, batch); + } + } + } + + /* Return false if any input column is non-repeating, otherwise true. + * This returns false if all the arguments are constant or there + * are zero arguments. + * + * A possible future optimization is to set the output to isRepeating + * for cases of all-constant arguments for deterministic functions. + */ + private boolean allInputColsRepeating(VectorizedRowBatch batch) { + int varArgCount = 0; + for (int i = 0; i < argDescs.length; i++) { + if (argDescs[i].isVariable() && !batch.cols[argDescs[i].getColumnNum()].isRepeating) { + return false; + } + varArgCount += 1; + } + if (varArgCount > 0) { + return true; + } else { + return false; + } + } + + /* Calculate the function result for row i of the batch and + * set the output column vector entry i to the result. + */ + private void setResult(int i, VectorizedRowBatch b) { + + // get arguments + for (int j = 0; j < argDescs.length; j++) { + deferredChildren[j] = argDescs[j].getDeferredJavaObject(i, b, j, writers); + } + + // call function + Object result; + try { + result = genericUDF.evaluate(deferredChildren); + } catch (HiveException e) { + + /* For UDFs that expect primitive types (like int instead of Integer or IntWritable), + * this will catch the the exception that happens if they are passed a NULL value. + * Then the default NULL handling logic will apply, and the result will be NULL. + */ + result = null; + } + + // set output column vector entry + if (result == null) { + b.cols[outputColumn].noNulls = false; + b.cols[outputColumn].isNull[i] = true; + } else { + b.cols[outputColumn].isNull[i] = false; + setOutputCol(b.cols[outputColumn], i, result); + } + } + + private void setOutputCol(ColumnVector colVec, int i, Object value) { + + /* Depending on the output type, get the value, cast the result to the + * correct type if needed, and assign the result into the output vector. + */ + if (outputOI instanceof WritableStringObjectInspector) { + BytesColumnVector bv = (BytesColumnVector) colVec; + Text t; + if (value instanceof String) { + t = new Text((String) value); + } else { + t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value); + } + bv.setVal(i, t.getBytes(), 0, t.getLength()); + } else if (outputOI instanceof WritableIntObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + if (value instanceof Integer) { + lv.vector[i] = (Integer) value; + } else { + lv.vector[i] = ((WritableIntObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableLongObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + if (value instanceof Long) { + lv.vector[i] = (Long) value; + } else { + lv.vector[i] = ((WritableLongObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableDoubleObjectInspector) { + DoubleColumnVector dv = (DoubleColumnVector) colVec; + if (value instanceof Double) { + dv.vector[i] = (Double) value; + } else { + dv.vector[i] = ((WritableDoubleObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableFloatObjectInspector) { + DoubleColumnVector dv = (DoubleColumnVector) colVec; + if (value instanceof Float) { + dv.vector[i] = (Float) value; + } else { + dv.vector[i] = ((WritableFloatObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableShortObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + if (value instanceof Short) { + lv.vector[i] = (Short) value; + } else { + lv.vector[i] = ((WritableShortObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableByteObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + if (value instanceof Byte) { + lv.vector[i] = (Byte) value; + } else { + lv.vector[i] = ((WritableByteObjectInspector) outputOI).get(value); + } + } else if (outputOI instanceof WritableTimestampObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + Timestamp ts; + if (value instanceof Timestamp) { + ts = (Timestamp) value; + } else { + ts = ((WritableTimestampObjectInspector) outputOI).getPrimitiveJavaObject(value); + } + /* Calculate the number of nanoseconds since the epoch as a long integer. By convention + * that is how Timestamp values are operated on in a vector. + */ + long l = ts.getTime() * 1000000 // Shift the milliseconds value over by 6 digits + // to scale for nanosecond precision. + // The milliseconds digits will by convention be all 0s. + + ts.getNanos() % 1000000; // Add on the remaining nanos. + // The % 1000000 operation removes the ms values + // so that the milliseconds are not counted twice. + lv.vector[i] = l; + } else if (outputOI instanceof WritableBooleanObjectInspector) { + LongColumnVector lv = (LongColumnVector) colVec; + if (value instanceof Boolean) { + lv.vector[i] = (Boolean) value ? 1 : 0; + } else { + lv.vector[i] = ((WritableBooleanObjectInspector) outputOI).get(value) ? 1 : 0; + } + } else { + throw new RuntimeException("Unhandled object type " + outputOI.getTypeName()); + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public String getOutputType() { + return resultType; + } + + public String getResultType() { + return resultType; + } + + public void setResultType(String resultType) { + this.resultType = resultType; + } + + public VectorUDFArgDesc[] getArgDescs() { + return argDescs; + } + + public void setArgDescs(VectorUDFArgDesc[] argDescs) { + this.argDescs = argDescs; + } + + public ExprNodeGenericFuncDesc getExpr() { + return expr; + } + + public void setExpr(ExprNodeGenericFuncDesc expr) { + this.expr = expr; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFArgDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFArgDesc.java new file mode 100644 index 0000000..48100b0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFArgDesc.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; + +/** + * Descriptor for function argument. + */ +public class VectorUDFArgDesc implements Serializable { + + private static final long serialVersionUID = 1L; + + private boolean isConstant; + private int columnNum; + private transient GenericUDF.DeferredJavaObject constObjVal; + private ExprNodeConstantDesc constExpr; + + public VectorUDFArgDesc() { + } + + /** + * Set this argument to a constant value extracted from the + * expression tree. + */ + public void setConstant(ExprNodeConstantDesc expr) { + isConstant = true; + constExpr = expr; + } + + /* Prepare the constant for use when the function is called. To be used + * during initialization. + */ + public void prepareConstant() { + PrimitiveCategory pc = ((PrimitiveTypeInfo) constExpr.getTypeInfo()) + .getPrimitiveCategory(); + + // Convert from Java to Writable + Object writableValue = PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject( + constExpr.getValue()); + + constObjVal = new GenericUDF.DeferredJavaObject(writableValue); + } + + /** + * Set this argument to be a "variable" one which is to be taken from + * a specified column vector number i. + */ + public void setVariable(int i) { + columnNum = i; + } + + public boolean isConstant() { + return isConstant; + } + + public boolean isVariable() { + return !isConstant; + } + + public int getColumn() { + return columnNum; + } + + public DeferredObject getDeferredJavaObject(int row, VectorizedRowBatch b, int argPosition, + VectorExpressionWriter[] writers) { + + if (isConstant()) { + return this.constObjVal; + } else { + + // get column + ColumnVector cv = b.cols[columnNum]; + + // write value to object that can be inspected + Object o; + try { + o = writers[argPosition].writeValue(cv, row); + return new GenericUDF.DeferredJavaObject(o); + } catch (HiveException e) { + throw new RuntimeException("Unable to get Java object from VectorizedRowBatch"); + } + } + } + + public boolean getIsConstant() { + return isConstant; + } + + public void setIsConstant(boolean isConstant) { + this.isConstant = isConstant; + } + + public int getColumnNum() { + return columnNum; + } + + public void setColumnNum(int columnNum) { + this.columnNum = columnNum; + } + + public ExprNodeConstantDesc getConstExpr() { + return constExpr; + } + + public void setConstExpr(ExprNodeConstantDesc constExpr) { + this.constExpr = constExpr; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 5b467bb..545b9f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -461,7 +461,7 @@ private boolean validateExprNodeDesc(ExprNodeDesc desc) { } if (desc instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; - boolean r = validateGenericUdf(d.getGenericUDF()); + boolean r = validateGenericUdf(d); if (!r) { return false; } @@ -474,7 +474,11 @@ private boolean validateExprNodeDesc(ExprNodeDesc desc) { return true; } - private boolean validateGenericUdf(GenericUDF genericUDF) { + private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { + if (VectorizationContext.isCustomUDF(genericUDFExpr)) { + return true; + } + GenericUDF genericUDF = genericUDFExpr.getGenericUDF(); if (genericUDF instanceof GenericUDFBridge) { Class udf = ((GenericUDFBridge) genericUDF).getUdfClass(); return supportedGenericUDFs.contains(udf); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 048824a..bcdb4c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -659,7 +659,7 @@ public static ExprNodeDesc getFuncExprNodeDesc(String udfName, List childrenList = new ArrayList(children.length); childrenList.addAll(Arrays.asList(children)); - return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList); + return ExprNodeGenericFuncDesc.newInstance(genericUDF, udfName, childrenList); } static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, @@ -724,7 +724,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // Calculate TypeInfo TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + .getGenericUDFForIndex(), funcText, children); } else if (myt.getCategory() == Category.MAP) { // Only allow constant map key for now if (!(children.get(1) instanceof ExprNodeConstantDesc)) { @@ -740,7 +740,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // Calculate TypeInfo TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + .getGenericUDFForIndex(), funcText, children); } else { throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); @@ -861,7 +861,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } } - desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children); + desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), funcText, children); } // UDFOPPositive is a no-op. // However, we still create it, and then remove it here, to make sure we diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 52fe47b..e3fcaf6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -62,6 +62,7 @@ */ private GenericUDF genericUDF; private List childExprs; + private transient String funcText; /** * This class uses a writableObjectInspector rather than a TypeInfo to store * the canonical type information for this NodeDesc. @@ -73,13 +74,19 @@ public ExprNodeGenericFuncDesc() { } + /* If the function has an explicit name like func(args) then call a + * constructor that explicitly provides the function name in the + * funcText argument. + */ public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF, + String funcText, List children) { this(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo), - genericUDF, children); + genericUDF, funcText, children); } public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF, + String funcText, List children) { super(TypeInfoUtils.getTypeInfoFromObjectInspector(oi)); this.writableObjectInspector = @@ -87,6 +94,18 @@ public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF, assert (genericUDF != null); this.genericUDF = genericUDF; this.childExprs = children; + this.funcText = funcText; + } + + // Backward-compatibility interfaces for functions without a user-visible name. + public ExprNodeGenericFuncDesc(TypeInfo typeInfo, GenericUDF genericUDF, + List children) { + this(typeInfo, genericUDF, null, children); + } + + public ExprNodeGenericFuncDesc(ObjectInspector oi, GenericUDF genericUDF, + List children) { + this(oi, genericUDF, null, children); } @Override @@ -165,17 +184,20 @@ public ExprNodeDesc clone() { cloneCh.add(ch.clone()); } ExprNodeGenericFuncDesc clone = new ExprNodeGenericFuncDesc(typeInfo, - FunctionRegistry.cloneGenericUDF(genericUDF), cloneCh); + FunctionRegistry.cloneGenericUDF(genericUDF), funcText, cloneCh); return clone; } /** - * Create a exprNodeGenericFuncDesc based on the genericUDFClass and the - * children parameters. + * Create a ExprNodeGenericFuncDesc based on the genericUDFClass and the + * children parameters. If the function has an explicit name, the + * newInstance method should be passed the function name in the funcText + * argument. * * @throws UDFArgumentException */ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, + String funcText, List children) throws UDFArgumentException { ObjectInspector[] childrenOIs = new ObjectInspector[children.size()]; for (int i = 0; i < childrenOIs.length; i++) { @@ -232,7 +254,15 @@ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, } } - return new ExprNodeGenericFuncDesc(oi, genericUDF, children); + return new ExprNodeGenericFuncDesc(oi, genericUDF, funcText, children); + } + + /* Backward-compatibility interface for the case where there is no explicit + * name for the function. + */ + public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, + List children) throws UDFArgumentException { + return newInstance(genericUDF, null, children); } @Override @@ -285,4 +315,8 @@ public boolean isSortedExpr() { public void setSortedExpr(boolean isSortedExpr) { this.isSortedExpr = isSortedExpr; } + + public String getFuncText() { + return this.funcText; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFAdaptor.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFAdaptor.java new file mode 100644 index 0000000..22fb1e9 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFAdaptor.java @@ -0,0 +1,304 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.util.GenericUDFIsNull; +import org.apache.hadoop.hive.ql.exec.vector.util.LongUDF; +import org.apache.hadoop.hive.ql.exec.vector.util.ConcatTextLongDoubleUDF; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Test; + + +public class TestVectorUDFAdaptor { + + static byte[] blue = null; + static byte[] red = null; + + static { + try { + blue = "blue".getBytes("UTF-8"); + red = "red".getBytes("UTF-8"); + } catch (Exception e) { + ; // do nothing + } + } + + @Test + public void testLongUDF() { + + // create a syntax tree for a simple function call "longudf(col0)" + ExprNodeGenericFuncDesc funcDesc; + TypeInfo typeInfo = TypeInfoFactory.longTypeInfo; + GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, + LongUDF.class.getName()); + List children = new ArrayList(); + ExprNodeColumnDesc colDesc + = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false); + children.add(colDesc); + VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1]; + argDescs[0] = new VectorUDFArgDesc(); + argDescs[0].setVariable(0); + funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, + genericUDFBridge.getUdfName(), children); + + // create the adaptor for this function call to work in vector mode + VectorUDFAdaptor vudf = null; + try { + vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs); + } catch (HiveException e) { + + // We should never get here. + assertTrue(false); + } + + VectorizedRowBatch b = getBatchLongInLongOut(); + vudf.evaluate(b); + + // verify output + LongColumnVector out = (LongColumnVector) b.cols[1]; + assertEquals(1000, out.vector[0]); + assertEquals(1001, out.vector[1]); + assertEquals(1002, out.vector[2]); + assertTrue(out.noNulls); + assertFalse(out.isRepeating); + + // with nulls + b = getBatchLongInLongOut(); + out = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = false; + vudf.evaluate(b); + assertFalse(out.noNulls); + assertEquals(1000, out.vector[0]); + assertEquals(1001, out.vector[1]); + assertTrue(out.isNull[2]); + assertFalse(out.isRepeating); + + // with repeating + b = getBatchLongInLongOut(); + out = (LongColumnVector) b.cols[1]; + b.cols[0].isRepeating = true; + vudf.evaluate(b); + + // The implementation may or may not set output it isRepeting. + // That is implementation-defined. + assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 + || !b.cols[1].isRepeating && out.vector[2] == 1000); + assertEquals(3, b.size); + } + + @Test + public void testMultiArgumentUDF() { + + // create a syntax tree for a function call "testudf(col0, col1, col2)" + ExprNodeGenericFuncDesc funcDesc; + TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo; + TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo; + TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo; + GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, + ConcatTextLongDoubleUDF.class.getName()); + List children = new ArrayList(); + children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false)); + children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false)); + children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false)); + + VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3]; + for (int i = 0; i < 3; i++) { + argDescs[i] = new VectorUDFArgDesc(); + argDescs[i].setVariable(i); + } + funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, + genericUDFBridge.getUdfName(), children); + + // create the adaptor for this function call to work in vector mode + VectorUDFAdaptor vudf = null; + try { + vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs); + } catch (HiveException e) { + + // We should never get here. + assertTrue(false); + throw new RuntimeException(e); + } + + // with no nulls + VectorizedRowBatch b = getBatchStrDblLongWithStrOut(); + vudf.evaluate(b); + byte[] result = null; + byte[] result2 = null; + try { + result = "red:1:1.0".getBytes("UTF-8"); + result2 = "blue:0:0.0".getBytes("UTF-8"); + } catch (Exception e) { + ; + } + BytesColumnVector out = (BytesColumnVector) b.cols[3]; + int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], + out.start[1], out.length[1]); + assertEquals(0, cmp); + assertTrue(out.noNulls); + + // with nulls + b = getBatchStrDblLongWithStrOut(); + b.cols[1].noNulls = false; + vudf.evaluate(b); + out = (BytesColumnVector) b.cols[3]; + assertFalse(out.noNulls); + assertTrue(out.isNull[1]); + + // with all input columns repeating + b = getBatchStrDblLongWithStrOut(); + b.cols[0].isRepeating = true; + b.cols[1].isRepeating = true; + b.cols[2].isRepeating = true; + vudf.evaluate(b); + + out = (BytesColumnVector) b.cols[3]; + assertTrue(out.isRepeating); + cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], + out.start[0], out.length[0]); + assertEquals(0, cmp); + assertTrue(out.noNulls); + } + + private VectorizedRowBatch getBatchLongInLongOut() { + VectorizedRowBatch b = new VectorizedRowBatch(2); + LongColumnVector in = new LongColumnVector(); + LongColumnVector out = new LongColumnVector(); + b.cols[0] = in; + b.cols[1] = out; + in.vector[0] = 0; + in.vector[1] = 1; + in.vector[2] = 2; + in.isNull[2] = true; + in.noNulls = true; + b.size = 3; + return b; + } + + private VectorizedRowBatch getBatchStrDblLongWithStrOut() { + VectorizedRowBatch b = new VectorizedRowBatch(4); + BytesColumnVector strCol = new BytesColumnVector(); + LongColumnVector longCol = new LongColumnVector(); + DoubleColumnVector dblCol = new DoubleColumnVector(); + BytesColumnVector outCol = new BytesColumnVector(); + b.cols[0] = strCol; + b.cols[1] = longCol; + b.cols[2] = dblCol; + b.cols[3] = outCol; + + strCol.initBuffer(); + strCol.setVal(0, blue, 0, blue.length); + strCol.setVal(1, red, 0, red.length); + longCol.vector[0] = 0; + longCol.vector[1] = 1; + dblCol.vector[0] = 0.0; + dblCol.vector[1] = 1.0; + + // set one null value for possible later use + longCol.isNull[1] = true; + + // but have no nulls initially + longCol.noNulls = true; + strCol.noNulls = true; + dblCol.noNulls = true; + outCol.initBuffer(); + b.size = 2; + return b; + } + + + // test the UDF adaptor for a generic UDF (as opposed to a legacy UDF) + @Test + public void testGenericUDF() { + + // create a syntax tree for a function call 'myisnull(col0, "UNKNOWN")' + ExprNodeGenericFuncDesc funcDesc; + GenericUDF genericUDF = new GenericUDFIsNull(); + TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo; + + List children = new ArrayList(); + children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false)); + children.add(new ExprNodeConstantDesc(typeInfoStr, "UNKNOWN")); + + VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[2]; + for (int i = 0; i < 2; i++) { + argDescs[i] = new VectorUDFArgDesc(); + } + argDescs[0].setVariable(0); + argDescs[1].setConstant((ExprNodeConstantDesc) children.get(1)); + funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDF, "myisnull", children); + + // create the adaptor for this function call to work in vector mode + VectorUDFAdaptor vudf = null; + try { + vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs); + } catch (HiveException e) { + + // We should never get here. + assertTrue(false); + } + + VectorizedRowBatch b; + + byte[] red = null; + byte[] unknown = null; + try { + red = "red".getBytes("UTF-8"); + unknown = "UNKNOWN".getBytes("UTF-8"); + } catch (Exception e) { + ; + } + BytesColumnVector out; + + // with nulls + b = getBatchStrDblLongWithStrOut(); + b.cols[0].noNulls = false; + b.cols[0].isNull[0] = true; // set 1st entry to null + vudf.evaluate(b); + out = (BytesColumnVector) b.cols[3]; + + // verify outputs + int cmp = StringExpr.compare(red, 0, red.length, + out.vector[1], out.start[1], out.length[1]); + assertEquals(0, cmp); + cmp = StringExpr.compare(unknown, 0, unknown.length, + out.vector[0], out.start[0], out.length[0]); + assertEquals(0, cmp); + + // output entry should not be null for null input for this particular generic UDF + assertTrue(out.noNulls || !out.isNull[0]); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/ConcatTextLongDoubleUDF.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/ConcatTextLongDoubleUDF.java new file mode 100644 index 0000000..c2288a5 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/ConcatTextLongDoubleUDF.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.util; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.io.Text; + +@Description( + name = "testudf", + value = "_FUNC_(str) - combines arguments to output string", + extended = "Example:\n" + + " > SELECT testudf(name, dob, salary) FROM employee;\n" + + " Jack" + ) + +/* This is a test function that takes three different kinds + * of arguments, for use to verify vectorized UDF invocation. + */ +public class ConcatTextLongDoubleUDF extends UDF { + public Text evaluate(Text s, Long i, Double d) { + + if (s == null + || i == null + || d == null) { + return null; + } + StringBuilder sb = new StringBuilder(); + sb.append(s.toString()); + sb.append(":"); + sb.append(i); + sb.append(":"); + sb.append(d); + return new Text(sb.toString()); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/GenericUDFIsNull.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/GenericUDFIsNull.java new file mode 100644 index 0000000..05a1634 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/GenericUDFIsNull.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.util; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +@Description(name = "myisnull", +value = "_FUNC_(value,default_value) - Returns default value if value is null else returns value", +extended = "Example:\n" ++ " > SELECT _FUNC_(null,'bla') FROM src LIMIT 1;\n" + " bla") +/* + * This is a copy of GenericUDFNvl, which is built-in. We'll make it a generic + * custom UDF for test purposes. + */ +public class GenericUDFIsNull extends GenericUDF{ + private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; + private transient ObjectInspector[] argumentOIs; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + argumentOIs = arguments; + if (arguments.length != 2) { + throw new UDFArgumentLengthException( + "The operator 'MYISNULL' accepts 2 arguments."); + } + returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); + if (!(returnOIResolver.update(arguments[0]) && returnOIResolver + .update(arguments[1]))) { + throw new UDFArgumentTypeException(2, + "The first and seconds arguments of function MYISNULL should have the same type, " + + "but they are different: \"" + arguments[0].getTypeName() + + "\" and \"" + arguments[1].getTypeName() + "\""); + } + return returnOIResolver.get(); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object retVal = returnOIResolver.convertIfNecessary(arguments[0].get(), + argumentOIs[0]); + if (retVal == null ){ + retVal = returnOIResolver.convertIfNecessary(arguments[1].get(), + argumentOIs[1]); + } + return retVal; + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("if "); + sb.append(children[0]); + sb.append(" is null "); + sb.append("returns"); + sb.append(children[1]); + return sb.toString() ; + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/LongUDF.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/LongUDF.java new file mode 100644 index 0000000..23e80e5 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/LongUDF.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.util; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.LongWritable; + +// A UDF like one a user would create, implementing the UDF interface. + +@Description( + name = "longudf", + value = "_FUNC_(arg) - returns arg + 1000", + extended = "Example:\n" + + " > SELECT longudf(eno) FROM employee;\n" + ) + +public class LongUDF extends UDF { + public LongWritable evaluate(LongWritable i) { + if (i == null) { + return null; + } + return new LongWritable(i.get() + 1000); + } +} \ No newline at end of file