Description
For instance, see https://github.com/apache/spark/blob/08c76b5d39127ae207d9d1fff99c2551e6ce2581/python/pyspark/sql/types.py#L894-L905
Looks we intended to support datetime.time and None for type inference too but it does not work:
>>> spark.createDataFrame([[datetime.time()]]) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 751, in createDataFrame rdd, schema = self._createFromLocal(map(prepare, data), schema) File "/.../spark/python/pyspark/sql/session.py", line 432, in _createFromLocal data = [schema.toInternal(row) for row in data] File "/.../spark/python/pyspark/sql/types.py", line 604, in toInternal for f, v, c in zip(self.fields, obj, self._needConversion)) File "/.../spark/python/pyspark/sql/types.py", line 604, in <genexpr> for f, v, c in zip(self.fields, obj, self._needConversion)) File "/.../spark/python/pyspark/sql/types.py", line 442, in toInternal return self.dataType.toInternal(obj) File "/.../spark/python/pyspark/sql/types.py", line 193, in toInternal else time.mktime(dt.timetuple())) AttributeError: 'datetime.time' object has no attribute 'timetuple' >>> spark.createDataFrame([[None]]) Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 751, in createDataFrame rdd, schema = self._createFromLocal(map(prepare, data), schema) File "/.../spark/python/pyspark/sql/session.py", line 419, in _createFromLocal struct = self._inferSchemaFromList(data, names=schema) File "/.../python/pyspark/sql/session.py", line 353, in _inferSchemaFromList raise ValueError("Some of types cannot be determined after inferring") ValueError: Some of types cannot be determined after inferring
Looks we better add supported type inference specification.