Index: contrib/src/test/results/clientnegative/invalid_row_sequence.q.out =================================================================== --- contrib/src/test/results/clientnegative/invalid_row_sequence.q.out (revision 0) +++ contrib/src/test/results/clientnegative/invalid_row_sequence.q.out (revision 0) @@ -0,0 +1,15 @@ +PREHOOK: query: -- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence +PREHOOK: type: DROPFUNCTION +POSTHOOK: query: -- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence +POSTHOOK: type: DROPFUNCTION +PREHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +PREHOOK: type: CREATEFUNCTION +POSTHOOK: query: create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence' +POSTHOOK: type: CREATEFUNCTION +FAILED: Error in semantic analysis: Stateful UDF's can only be invoked in the SELECT list Index: contrib/src/test/results/clientpositive/udf_row_sequence.q.out =================================================================== --- contrib/src/test/results/clientpositive/udf_row_sequence.q.out (revision 1071012) +++ contrib/src/test/results/clientpositive/udf_row_sequence.q.out (working copy) @@ -79,7 +79,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2010-06-22_17-23-44_212_4881494104125773465/10002 + file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-15_19-14-13_288_8320801401920462216/-mr-10002 Reduce Output Operator key expressions: expr: _col1 @@ -110,13 +110,13 @@ order by r PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2010-06-22_17-23-44_546_5387418396452836474/10000 +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-15_19-14-13_525_5383790822499413811/-mr-10000 POSTHOOK: query: select key, row_sequence() as r from (select key from src order by key) x order by r POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2010-06-22_17-23-44_546_5387418396452836474/10000 +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-15_19-14-13_525_5383790822499413811/-mr-10000 0 1 0 2 0 3 @@ -617,6 +617,42 @@ 97 498 98 499 98 500 +PREHOOK: query: -- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-15_19-14-27_281_5571644795410439851/-mr-10000 +POSTHOOK: query: -- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/7P/7PeC14kXFIWq0PIYyexGbmKuXUk/-Tmp-/jsichi/hive_2011-02-15_19-14-27_281_5571644795410439851/-mr-10000 +0 false +0 false +0 false +10 false +100 false +100 false +103 false +103 false +104 false +104 false +105 true +11 false +111 false +113 false +113 false +114 false +116 false +118 false +118 false +119 false PREHOOK: query: drop temporary function row_sequence PREHOOK: type: DROPFUNCTION POSTHOOK: query: drop temporary function row_sequence Index: contrib/src/test/queries/clientnegative/invalid_row_sequence.q =================================================================== --- contrib/src/test/queries/clientnegative/invalid_row_sequence.q (revision 0) +++ contrib/src/test/queries/clientnegative/invalid_row_sequence.q (revision 0) @@ -0,0 +1,13 @@ +-- Verify that a stateful UDF cannot be used outside of the SELECT list + +drop temporary function row_sequence; + +add jar ${system:build.dir}/hive-contrib-${system:hive.version}.jar; + +create temporary function row_sequence as +'org.apache.hadoop.hive.contrib.udf.UDFRowSequence'; + +select key +from (select key from src order by key) x +where row_sequence() < 5 +order by key; Index: contrib/src/test/queries/clientpositive/udf_row_sequence.q =================================================================== --- contrib/src/test/queries/clientpositive/udf_row_sequence.q (revision 1071012) +++ contrib/src/test/queries/clientpositive/udf_row_sequence.q (working copy) @@ -22,4 +22,10 @@ from (select key from src order by key) x order by r; +-- make sure stateful functions do not get short-circuited away +-- a true result for key=105 would indicate undesired short-circuiting +select key, (key = 105) and (row_sequence() = 1) +from (select key from src order by key) x +order by key limit 20; + drop temporary function row_sequence; Index: contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java =================================================================== --- contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java (revision 1071012) +++ contrib/src/java/org/apache/hadoop/hive/contrib/udf/UDFRowSequence.java (working copy) @@ -28,7 +28,7 @@ */ @Description(name = "row_sequence", value = "_FUNC_() - Returns a generated row sequence number starting from 1") -@UDFType(deterministic = false) +@UDFType(deterministic = false, stateful = true) public class UDFRowSequence extends UDF { private LongWritable result = new LongWritable(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -1005,6 +1005,11 @@ * Returns whether a GenericUDF is deterministic or not. */ public static boolean isDeterministic(GenericUDF genericUDF) { + if (isStateful(genericUDF)) { + // stateful implies non-deterministic, regardless of whatever + // the deterministic annotation declares + return false; + } UDFType genericUDFType = genericUDF.getClass().getAnnotation(UDFType.class); if (genericUDFType != null && genericUDFType.deterministic() == false) { return false; @@ -1022,6 +1027,26 @@ } /** + * Returns whether a GenericUDF is stateful or not. + */ + public static boolean isStateful(GenericUDF genericUDF) { + UDFType genericUDFType = genericUDF.getClass().getAnnotation(UDFType.class); + if (genericUDFType != null && genericUDFType.stateful()) { + return true; + } + + if (genericUDF instanceof GenericUDFBridge) { + GenericUDFBridge bridge = (GenericUDFBridge) genericUDF; + UDFType bridgeUDFType = bridge.getUdfClass().getAnnotation(UDFType.class); + if (bridgeUDFType != null && bridgeUDFType.stateful()) { + return true; + } + } + + return false; + } + + /** * Returns whether the exprNodeDesc is a node of "and", "or", "not". */ public static boolean isOpAndOrNot(ExprNodeDesc desc) { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (working copy) @@ -630,6 +630,12 @@ if (fi.getGenericUDTF() != null) { throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); } + if (!ctx.getAllowStatefulFunctions() && (fi.getGenericUDF() != null)) { + if (FunctionRegistry.isStateful(fi.getGenericUDF())) { + throw new SemanticException( + ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); + } + } desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (working copy) @@ -131,6 +131,7 @@ UDTF_LATERAL_VIEW("UDTF's cannot be in a select expression when there is a lateral view"), UDTF_ALIAS_MISMATCH("The number of aliases supplied in the AS clause does not match the " + "number of columns output by the UDTF"), + UDF_STATEFUL_INVALID_LOCATION("Stateful UDF's can only be invoked in the SELECT list"), LATERAL_VIEW_WITH_JOIN("Join with a lateral view is not supported"), LATERAL_VIEW_INVALID_CHILD("Lateral view AST with invalid child"), OUTPUT_SPECIFIED_MULTIPLE_TIMES("The same output cannot be present multiple times: "), Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -2041,7 +2041,10 @@ qb.getAliases()); } else { // Case when this is an expression - ExprNodeDesc exp = genExprNodeDesc(expr, inputRR); + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + // We allow stateful functions in the SELECT list (but nowhere else) + tcCtx.setAllowStatefulFunctions(true); + ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx); col_list.add(exp); if (!StringUtils.isEmpty(alias) && (out_rwsch.get(null, colAlias) != null)) { @@ -6710,9 +6713,31 @@ * @return exprNodeDesc * @throws SemanticException */ - @SuppressWarnings("nls") public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) throws SemanticException { + // Since the user didn't supply a customized type-checking context, + // use default settings. + TypeCheckCtx tcCtx = new TypeCheckCtx(input); + return genExprNodeDesc(expr, input, tcCtx); + } + + /** + * Generates an expression node descriptor for the expression passed in the + * arguments. This function uses the row resolver and the metadata information + * that are passed as arguments to resolve the column names to internal names. + * + * @param expr + * The expression + * @param input + * The row resolver + * @param tcCtx + * Customized type-checking context + * @return exprNodeDesc + * @throws SemanticException + */ + @SuppressWarnings("nls") + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, + TypeCheckCtx tcCtx) throws SemanticException { // We recursively create the exprNodeDesc. Base cases: when we encounter // a column ref, we convert that into an exprNodeColumnDesc; when we // encounter @@ -6732,8 +6757,7 @@ .getIsVirtualCol()); } - // Create the walker, the rules dispatcher and the context. - TypeCheckCtx tcCtx = new TypeCheckCtx(input); + // Create the walker and the rules dispatcher. tcCtx.setUnparseTranslator(unparseTranslator); HashMap nodeOutputs = Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (working copy) @@ -48,6 +48,11 @@ private ASTNode errorSrcNode; /** + * Whether to allow stateful UDF invocations. + */ + private boolean allowStatefulFunctions; + + /** * Constructor. * * @param inputRR @@ -56,6 +61,7 @@ public TypeCheckCtx(RowResolver inputRR) { setInputRR(inputRR); error = null; + allowStatefulFunctions = false; } /** @@ -89,6 +95,20 @@ } /** + * @param allowStatefulFunctions whether to allow stateful UDF invocations + */ + public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { + this.allowStatefulFunctions = allowStatefulFunctions; + } + + /** + * @return whether to allow stateful UDF invocations + */ + public boolean getAllowStatefulFunctions() { + return allowStatefulFunctions; + } + + /** * @param error * the error to set * Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (revision 1071012) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java (working copy) @@ -15,4 +15,5 @@ @Inherited public @interface UDFType { boolean deterministic() default true; + boolean stateful() default false; }