Description
Add a new API for registering row-at-a-time or scalar vectorized UDFs. The registered UDFs can be used in the SQL statement.
>>> from pyspark.sql.types import IntegerType >>> from pyspark.sql.functions import udf >>> slen = udf(lambda s: len(s), IntegerType()) >>> _ = spark.udf.registerUDF("slen", slen) >>> spark.sql("SELECT slen('test')").collect() [Row(slen(test)=4)] >>> import random >>> from pyspark.sql.functions import udf >>> from pyspark.sql.types import IntegerType >>> random_udf = udf(lambda: random.randint(0, 100), IntegerType()).asNondeterministic() >>> newRandom_udf = spark.catalog.registerUDF("random_udf", random_udf) >>> spark.sql("SELECT random_udf()").collect() [Row(random_udf()=82)] >>> spark.range(1).select(newRandom_udf()).collect() [Row(random_udf()=62)] >>> from pyspark.sql.functions import pandas_udf, PandasUDFType >>> @pandas_udf("integer", PandasUDFType.SCALAR) ... def add_one(x): ... return x + 1 ... >>> _ = spark.udf.registerUDF("add_one", add_one) >>> spark.sql("SELECT add_one(id) FROM range(10)").collect()