Index: contrib/src/test/results/clientnegative/invalid_row_sequence.q.out =================================================================== --- contrib/src/test/results/clientnegative/invalid_row_sequence.q.out (revision 0) +++ contrib/src/test/results/clientnegative/invalid_row_sequence.q.out (revision 0) @@ -0,0 +1,15 @@ +PREHOOK: query: -- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: -- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence +POSTHOOK: type: DROPFUNCTION +PREHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +PREHOOK: type: CREATEFUNCTION +POSTHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +POSTHOOK: type: CREATEFUNCTION +FAILED: Error in semantic analysis: Stateful UDF's can only be invoked in the SELECT list Index: contrib/src/test/results/clientnegative/case_with_row_sequence.q.out =================================================================== --- contrib/src/test/results/clientnegative/case_with_row_sequence.q.out (revision 0) +++ contrib/src/test/results/clientnegative/case_with_row_sequence.q.out (revision 0) @@ -0,0 +1,18 @@ +PREHOOK: query: drop temporary function row_sequence +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: drop temporary function row_sequence +POSTHOOK: type: DROPFUNCTION +PREHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +PREHOOK: type: CREATEFUNCTION +POSTHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +POSTHOOK: type: CREATEFUNCTION +PREHOOK: query: -- make sure a stateful function inside of CASE throws an exception +-- since the short-circuiting requirements are contradictory +SELECT CASE WHEN 3 > 2 THEN 10 WHEN row_sequence() > 5 THEN 20 ELSE 30 END +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_23-14-39_576_5692614556807208481/-mr-10000 +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask Index: contrib/src/test/results/clientpositive/udf_row_sequence.q.out =================================================================== --- contrib/src/test/results/clientpositive/udf_row_sequence.q.out (revision 1073978) +++ contrib/src/test/results/clientpositive/udf_row_sequence.q.out (working copy) @@ -79,7 +79,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/sdong/hive_2011-02-16_20-11-16_760_3970553477134855753/-mr-10002 + file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-26-27_614_6714509994567553903/-mr-10002 Reduce Output Operator key expressions: expr: _col1 @@ -110,13 +110,13 @@ order by r PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-16_20-11-16_840_7492426874452808984/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-26-27_973_4395636849347592535/-mr-10000 POSTHOOK: query: select key, row_sequence() as r from (select key from src order by key) x order by r POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-16_20-11-16_840_7492426874452808984/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-26-27_973_4395636849347592535/-mr-10000 0 1 0 2 0 3 @@ -617,6 +617,42 @@ 97 498 98 499 98 500 +PREHOOK: query: -- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-26-43_815_3305291736093408641/-mr-10000 +POSTHOOK: query: -- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-26-43_815_3305291736093408641/-mr-10000 +0 false +0 false +0 false +10 false +100 false +100 false +103 false +103 false +104 false +104 false +105 false +11 false +111 false +113 false +113 false +114 false +116 false +118 false +118 false +119 false PREHOOK: query: drop temporary function row_sequence PREHOOK: type: DROPFUNCTION POSTHOOK: query: drop temporary function row_sequence Index: contrib/src/test/queries/clientnegative/invalid_row_sequence.q =================================================================== --- contrib/src/test/queries/clientnegative/invalid_row_sequence.q (revision 0) +++ contrib/src/test/queries/clientnegative/invalid_row_sequence.q (revision 0) @@ -0,0 +1,13 @@ +-- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence; + +add jar ${system:build.dir}/hive-contrib-${system:hive.version}.jar; + +create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence'; + +select key +from (select key from src order by key) x +where row_sequence() < 5 +order by key; Index: contrib/src/test/queries/clientnegative/case_with_row_sequence.q =================================================================== --- contrib/src/test/queries/clientnegative/case_with_row_sequence.q (revision 0) +++ contrib/src/test/queries/clientnegative/case_with_row_sequence.q (revision 0) @@ -0,0 +1,10 @@ +drop temporary function row_sequence; + +add jar ${system:build.dir}/hive-contrib-${system:hive.version}.jar; +create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence'; + +-- make sure a stateful function inside of CASE throws an exception +-- since the short-circuiting requirements are contradictory +SELECT CASE WHEN 3 > 2 THEN 10 WHEN row_sequence() > 5 THEN 20 ELSE 30 END +FROM src LIMIT 1; Index: contrib/src/test/queries/clientpositive/udf_row_sequence.q =================================================================== --- contrib/src/test/queries/clientpositive/udf_row_sequence.q (revision 1073978) +++ contrib/src/test/queries/clientpositive/udf_row_sequence.q (working copy) @@ -22,4 +22,10 @@ from (select key from src order by key) x order by r; +-- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20; + drop temporary function row_sequence; Index: contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java =================================================================== --- contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java (revision 1073978) +++ contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java (working copy) @@ -28,7 +28,7 @@ */ @Description(name = "row_sequence", value = "_FUNC_() - Returns a generated row sequence number starting from 1") -@UDFType(deterministic = false) +@UDFType(deterministic = false, stateful = true) public class UDFRowSequence extends UDF { private LongWritable result = new LongWritable(); Index: ql/src/test/results/clientpositive/udf_case.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_case.q.out (revision 1073978) +++ ql/src/test/results/clientpositive/udf_case.q.out (working copy) @@ -134,7 +134,7 @@ FROM src LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-31-04_518_6857103604327967113/-mr-10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-51-38_651_2224865717899515186/-mr-10000 POSTHOOK: query: SELECT CASE 1 WHEN 1 THEN 2 WHEN 3 THEN 4 @@ -163,5 +163,22 @@ FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-31-04_518_6857103604327967113/-mr-10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-51-38_651_2224865717899515186/-mr-10000 2 5 15 NULL 20 24 +PREHOOK: query: -- verify that short-circuiting is working correctly for CASE +-- we should never get to the ELSE branch, which would raise an exception +SELECT CASE 1 WHEN 1 THEN 'yo' +ELSE reflect('java.lang.String', 'bogus', 1) END +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-51-44_358_6447239253045741230/-mr-10000 +POSTHOOK: query: -- verify that short-circuiting is working correctly for CASE +-- we should never get to the ELSE branch, which would raise an exception +SELECT CASE 1 WHEN 1 THEN 'yo' +ELSE reflect('java.lang.String', 'bogus', 1) END +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-22_22-51-44_358_6447239253045741230/-mr-10000 +yo Index: ql/src/test/queries/clientpositive/udf_case.q =================================================================== --- ql/src/test/queries/clientpositive/udf_case.q (revision 1073978) +++ ql/src/test/queries/clientpositive/udf_case.q (working copy) @@ -55,3 +55,9 @@ WHEN 21 THEN 24 END FROM src LIMIT 1; + +-- verify that short-circuiting is working correctly for CASE +-- we should never get to the ELSE branch, which would raise an exception +SELECT CASE 1 WHEN 1 THEN 'yo' +ELSE reflect('java.lang.String', 'bogus', 1) END +FROM src LIMIT 1; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -1005,6 +1005,11 @@ * Returns whether a GenericUDF is deterministic or not. */ public static boolean isDeterministic(GenericUDF genericUDF) { + if (isStateful(genericUDF)) { + // stateful implies non-deterministic, regardless of whatever + // the deterministic annotation declares + return false; + } UDFType genericUDFType = genericUDF.getClass().getAnnotation(UDFType.class); if (genericUDFType != null && genericUDFType.deterministic() == false) { return false; @@ -1022,6 +1027,26 @@ } /** + * Returns whether a GenericUDF is stateful or not. + */ + public static boolean isStateful(GenericUDF genericUDF) { + UDFType genericUDFType = genericUDF.getClass().getAnnotation(UDFType.class); + if (genericUDFType != null && genericUDFType.stateful()) { + return true; + } + + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge bridge = (GenericUDFBridge) genericUDF; + UDFType bridgeUDFType = bridge.getUdfClass().getAnnotation(UDFType.class); + if (bridgeUDFType != null && bridgeUDFType.stateful()) { + return true; + } + } + + return false; + } + + /** * Returns whether the exprNodeDesc is a node of "and", "or", "not". */ public static boolean isOpAndOrNot(ExprNodeDesc desc) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java (working copy) @@ -21,8 +21,11 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFCase; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** @@ -39,7 +42,8 @@ transient GenericUDF genericUDF; transient Object rowObject; transient ExprNodeEvaluator[] children; - transient DeferredExprObject[] deferredChildren; + transient GenericUDF.DeferredObject[] deferredChildren; + transient boolean isEager; /** * Class to allow deferred evaluation for GenericUDF. @@ -55,17 +59,60 @@ public Object get() throws HiveException { return eval.evaluate(rowObject); } - }; + } + /** + * Class to force eager evaluation for GenericUDF in cases where + * it is warranted. + */ + class EagerExprObject implements GenericUDF.DeferredObject { + + ExprNodeEvaluator eval; + + transient Object obj; + + EagerExprObject(ExprNodeEvaluator eval) { + this.eval = eval; + } + + void evaluate() throws HiveException { + obj = eval.evaluate(rowObject); + } + + public Object get() throws HiveException { + return obj; + } + } + public ExprNodeGenericFuncEvaluator(ExprNodeGenericFuncDesc expr) { this.expr = expr; children = new ExprNodeEvaluator[expr.getChildExprs().size()]; + isEager = false; for (int i = 0; i < children.length; i++) { - children[i] = ExprNodeEvaluatorFactory.get(expr.getChildExprs().get(i)); + ExprNodeDesc child = expr.getChildExprs().get(i); + ExprNodeEvaluator nodeEvaluator = ExprNodeEvaluatorFactory.get(child); + children[i] = nodeEvaluator; + // If we have eager evaluators anywhere below us, then we are eager too. + if (nodeEvaluator instanceof ExprNodeGenericFuncEvaluator) { + if (((ExprNodeGenericFuncEvaluator) nodeEvaluator).isEager) { + isEager = true; + } + // Base case: we are eager if a child is stateful + GenericUDF childUDF = + ((ExprNodeGenericFuncDesc) child).getGenericUDF(); + if (FunctionRegistry.isStateful(childUDF)) { + isEager = true; + } + } } - deferredChildren = new DeferredExprObject[expr.getChildExprs().size()]; + deferredChildren = + new GenericUDF.DeferredObject[expr.getChildExprs().size()]; for (int i = 0; i < deferredChildren.length; i++) { - deferredChildren[i] = new DeferredExprObject(children[i]); + if (isEager) { + deferredChildren[i] = new EagerExprObject(children[i]); + } else { + deferredChildren[i] = new DeferredExprObject(children[i]); + } } } @@ -77,12 +124,23 @@ childrenOIs[i] = children[i].initialize(rowInspector); } genericUDF = expr.getGenericUDF(); + if (isEager && + ((genericUDF instanceof GenericUDFCase) + || (genericUDF instanceof GenericUDFWhen))) { + throw new HiveException( + "Stateful expressions cannot be used inside of CASE"); + } return genericUDF.initialize(childrenOIs); } @Override public Object evaluate(Object row) throws HiveException { rowObject = row; + if (isEager) { + for (int i = 0; i < deferredChildren.length; i++) { + ((EagerExprObject) deferredChildren[i]).evaluate(); + } + } return genericUDF.evaluate(deferredChildren); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (working copy) @@ -630,6 +630,12 @@ if (fi.getGenericUDTF() != null) { throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); } + if (!ctx.getAllowStatefulFunctions() && (fi.getGenericUDF() != null)) { + if (FunctionRegistry.isStateful(fi.getGenericUDF())) { + throw new SemanticException( + ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); + } + } desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -131,6 +131,7 @@ UDTF_LATERAL_VIEW("UDTF's cannot be in a select expression when there is a lateral view"), UDTF_ALIAS_MISMATCH("The number of aliases supplied in the AS clause does not match the " + "number of columns output by the UDTF"), + UDF_STATEFUL_INVALID_LOCATION("Stateful UDF's can only be invoked in the SELECT list"), LATERAL_VIEW_WITH_JOIN("Join with a lateral view is not supported"), LATERAL_VIEW_INVALID_CHILD("Lateral view AST with invalid child"), OUTPUT_SPECIFIED_MULTIPLE_TIMES("The same output cannot be present multiple times: "), Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -2050,7 +2050,10 @@ qb.getAliases()); } else { // Case when this is an expression - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); col_list.add(exp); if (!StringUtils.isEmpty(alias) && (out_rwsch.get(null, colAlias) != null)) { @@ -6728,9 +6731,31 @@ * @return exprNodeDesc * @throws SemanticException */ - @SuppressWarnings("nls") public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) throws SemanticException { + // Since the user didn't supply a customized type-checking context, + // use default settings. + TypeCheckCtx tcCtx = new TypeCheckCtx(input); + return genExprNodeDesc(expr, input, tcCtx); + } + + /** + * Generates an expression node descriptor for the expression passed in the + * arguments. This function uses the row resolver and the metadata information + * that are passed as arguments to resolve the column names to internal names. + * + * @param expr + * The expression + * @param input + * The row resolver + * @param tcCtx + * Customized type-checking context + * @return exprNodeDesc + * @throws SemanticException + */ + @SuppressWarnings("nls") + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { // We recursively create the exprNodeDesc. Base cases: when we encounter // a column ref, we convert that into an exprNodeColumnDesc; when we // encounter @@ -6750,8 +6775,7 @@ .getIsVirtualCol()); } - // Create the walker, the rules dispatcher and the context. - TypeCheckCtx tcCtx = new TypeCheckCtx(input); + // Create the walker and the rules dispatcher. tcCtx.setUnparseTranslator(unparseTranslator); HashMap nodeOutputs = Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (working copy) @@ -48,6 +48,11 @@ private ASTNode errorSrcNode; /** + * Whether to allow stateful UDF invocations. + */ + private boolean allowStatefulFunctions; + + /** * Constructor. * * @param inputRR @@ -56,6 +61,7 @@ public TypeCheckCtx(RowResolver inputRR) { setInputRR(inputRR); error = null; + allowStatefulFunctions = false; } /** @@ -89,6 +95,20 @@ } /** + * @param allowStatefulFunctions whether to allow stateful UDF invocations + */ + public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { + this.allowStatefulFunctions = allowStatefulFunctions; + } + + /** + * @return whether to allow stateful UDF invocations + */ + public boolean getAllowStatefulFunctions() { + return allowStatefulFunctions; + } + + /** * @param error * the error to set * Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (revision 1073978) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (working copy) @@ -32,4 +32,5 @@ @Inherited public @interface UDFType { boolean deterministic() default true; + boolean stateful() default false; }