Details
-
Improvement
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
4.0.0
Description
In [5]: spark.range(1).select(sf.lit(np.array(["a", "b"], np.str_))).schema
---------------------------------------------------------------------------
PySparkTypeError Traceback (most recent call last)
Cell In[5], line 1
----> 1 spark.range(1).select(sf.lit(np.array(["a", "b"], np.str_))).schema
File ~/Dev/spark/python/pyspark/sql/utils.py:272, in try_remote_functions.<locals>.wrapped(*args, **kwargs)
269 if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
270 from pyspark.sql.connect import functions
--> 272 return getattr(functions, f._name_)(*args, **kwargs)
273 else:
274 return f(*args, **kwargs)
File ~/Dev/spark/python/pyspark/sql/connect/functions/builtin.py:271, in lit(col)
269 elif isinstance(col, np.ndarray) and col.ndim == 1:
270 if _from_numpy_type(col.dtype) is None:
--> 271 raise PySparkTypeError(
272 errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
273 messageParameters={"dtype": col.dtype.name},
274 )
276 # NumpyArrayConverter for Py4J can not support ndarray with int8 values.
277 # Actually this is not a problem for Connect, but here still convert it
278 # to int16 for compatibility.
279 if col.dtype == np.int8:
PySparkTypeError: [UNSUPPORTED_NUMPY_ARRAY_SCALAR] The type of array scalar 'str32' is not supported.