Description
====================================================================== FAIL [0.169s]: test_checking_csv_header (pyspark.sql.tests.connect.test_parity_datasources.DataSourcesParityTests.test_checking_csv_header) ---------------------------------------------------------------------- pyspark.errors.exceptions.connect.SparkConnectGrpcException: (org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error while reading file file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv. SQLSTATE: KD001 During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_datasources.py", line [167](https://github.com/HyukjinKwon/spark/actions/runs/8908464265/job/24464135564#step:9:168), in test_checking_csv_header self.assertRaisesRegex( AssertionError: "CSV header does not conform to the schema" does not match "(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error while reading file file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv. SQLSTATE: KD001"
====================================================================== ERROR [0.059s]: test_large_variable_types (pyspark.sql.tests.connect.test_parity_pandas_map.MapInPandasParityTests.test_large_variable_types) ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_map.py", line 115, in test_large_variable_types actual = df.mapInPandas(func, "str string, bin binary").collect() ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py", line 1645, in collect table, schema = self._session.client.to_table(query) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 858, in to_table table, schema, _, _, _ = self._execute_and_fetch(req) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1283, in _execute_and_fetch for response in self._execute_and_fetch_as_iterator(req): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1264, in _execute_and_fetch_as_iterator self._handle_error(error) File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1503, in _handle_error self._handle_rpc_error(error) File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1539, in _handle_rpc_error raise convert_exception(info, status.message) from None pyspark.errors.exceptions.connect.IllegalArgumentException: [INVALID_PARAMETER_VALUE.CHARSET] The value of parameter(s) `charset` in `encode` is invalid: expects one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', but got utf8. SQLSTATE: 2[202](https://github.com/HyukjinKwon/spark/actions/runs/8909131027/job/24465959134#step:9:203)3
====================================================================== ERROR [0.024s]: test_assert_approx_equal_decimaltype_custom_rtol_pass (pyspark.sql.tests.connect.test_utils.ConnectUtilsTests.test_assert_approx_equal_decimaltype_custom_rtol_pass) ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_utils.py", line 279, in test_assert_approx_equal_decimaltype_custom_rtol_pass assertDataFrameEqual(df1, df2, rtol=1e-1) File "/home/runner/work/spark/spark-3.5/python/pyspark/testing/utils.py", line 595, in assertDataFrameEqual actual_list = actual.collect() ^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py", line 1645, in collect table, schema = self._session.client.to_table(query) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 858, in to_table table, schema, _, _, _ = self._execute_and_fetch(req) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1283, in _execute_and_fetch for response in self._execute_and_fetch_as_iterator(req): File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1264, in _execute_and_fetch_as_iterator self._handle_error(error) File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1503, in _handle_error self._handle_rpc_error(error) File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1539, in _handle_rpc_error raise convert_exception(info, status.message) from None pyspark.errors.exceptions.connect.ArithmeticException: [NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION] 83.14 cannot be represented as Decimal(4, 3). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error, and return NULL instead. SQLSTATE: 22003 ----------------------------------------------------------------------
File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py", line 1057, in pyspark.sql.connect.dataframe.DataFrame.union Failed example: df3.show() Exception raised: Traceback (most recent call last): File "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py", line 1355, in __run exec(compile(example.source, filename, "single", File "<doctest pyspark.sql.connect.dataframe.DataFrame.union[10]>", line 1, in <module> df3.show() File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py", line 996, in show print(self._show_string(n, truncate, vertical)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py", line 753, in _show_string ).toPandas() ^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py", line 1663, in toPandas return self._session.client.to_pandas(query) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 873, in to_pandas table, schema, metrics, observed_metrics, _ = self._execute_and_fetch( ^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1283, in _execute_and_fetch for response in self._execute_and_fetch_as_iterator(req): File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1264, in _execute_and_fetch_as_iterator self._handle_error(error) File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1503, in _handle_error self._handle_rpc_error(error) File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1539, in _handle_rpc_error raise convert_exception(info, status.message) from None pyspark.errors.exceptions.connect.NumberFormatException: [CAST_INVALID_INPUT] The value 'Alice' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22018 JVM stacktrace: org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value 'Alice' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22018 at org.apache.spark.sql.errors.QueryExecutionErrors$.invalidInputInCastToNumberError(QueryExecutionErrors.scala:145) at org.apache.spark.sql.catalyst.util.UTF8StringUtils$.withException(UTF8StringUtils.scala:51) at org.apache.spark.sql.catalyst.util.UTF8StringUtils$.toLongExact(UTF8StringUtils.scala:31) at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2(Cast.scala:770) at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2$adapted(Cast.scala:770) at org.apache.spark.sql.catalyst.expressions.Cast.buildCast(Cast.scala:565) at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong...
********************************************************************** File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py", line 3546, in pyspark.sql.connect.functions.current_database Failed example: spark.range(1).select(current_database()).show() Expected: +------------------+ |current_database()| +------------------+ | default| +------------------+ Got: +----------------+ |current_schema()| +----------------+ | default| +----------------+ <BLANKLINE> ********************************************************************** File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py", line 3547, in pyspark.sql.connect.functions.current_schema Failed example: spark.range(1).select(sf.current_schema()).show() Expected: +------------------+ |current_database()| +------------------+ | default| +------------------+ Got: +----------------+ |current_schema()| +----------------+ | default| +----------------+ <BLANKLINE> ********************************************************************** File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py", line 3310, in pyspark.sql.connect.functions.to_unix_timestamp Failed example: df.select(to_unix_timestamp(df.e).alias('r')).collect() Exception raised: Traceback (most recent call last): File "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py", line 1355, in __run exec(compile(example.source, filename, "single", File "<doctest pyspark.sql.connect.functions.to_unix_timestamp[6]>", line 1, in <module> df.select(to_unix_timestamp(df.e).alias('r')).collect() File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py", line 1645, in collect table, schema = self._session.client.to_table(query) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 858, in to_table table, schema, _, _, _ = self._execute_and_fetch(req) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1283, in _execute_and_fetch for response in self._execute_and_fetch_as_iterator(req): File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1264, in _execute_and_fetch_as_iterator self._handle_error(error) File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1503, in _handle_error self._handle_rpc_error(error) File "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py", line 1539, in _handle_rpc_error raise convert_exception(info, status.message) from None pyspark.errors.exceptions.connect.DateTimeException: [CANNOT_PARSE_TIMESTAMP] Text '2016-04-08' could not be parsed at index 10. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22007 JVM stacktrace: org.apache.spark.SparkDateTimeException: [CANNOT_PARSE_TIMESTAMP] Text '2016-04-08' could not be parsed at index 10. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22007 at org.apache.spark.sql.errors.QueryExecutionErrors$.ansiDateTimeParseError(QueryExecutionErrors.scala:271) at org.apache.spark.sql.catalyst.expressions.ToTimestamp.eval(datetimeExpressions.scala:1300) at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:159) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(InterpretedMutableProjection.scala:89) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.$anonfun$applyOrElse$82(Optimizer.scala:2[208](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:209)) at scala.collection.immutable.List.map(List.scala:247) at scala.collection.immutable.List.map(List.scala:79) at org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.applyOrElse(Optimizer.scala:[220](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:221)8) at org.apache.spark.sql.catalyst.optimizer... **********************************************************************