Description
With this diff
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py index de96846c5c7..026a78bf547 100644 --- a/python/pyspark/sql/udf.py +++ b/python/pyspark/sql/udf.py @@ -180,6 +180,7 @@ class UserDefinedFunction(object): wrapper.deterministic = self.deterministic wrapper.asNondeterministic = functools.wraps( self.asNondeterministic)(lambda: self.asNondeterministic()._wrapped()) + wrapper._unwrapped = lambda: self return wrapper def asNondeterministic(self):
>>> from pyspark.sql.functions import udf >>> f = udf(lambda x: x) >>> spark.range(1).select(f("id")) DataFrame[<lambda>(id): string] >>> f._unwrapped()._judf_placeholder.udfDeterministic() True >>> ndf = f.asNondeterministic() >>> ndf.deterministic False >>> spark.range(1).select(ndf("id")) DataFrame[<lambda>(id): string] >>> ndf._unwrapped()._judf_placeholder.udfDeterministic() True
Seems we don't actually update the deterministic once it's called due to cache in Python side.