======================================================================
ERROR [68.963s]: test_crossvalidator_with_fold_col (pyspark.ml.tests.connect.test_legacy_mode_tuning.CrossValidatorTests.test_crossvalidator_with_fold_col)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/__w/spark/spark/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py", line 274, in test_crossvalidator_with_fold_col
cv.fit(train_dataset)
File "/__w/spark/spark/python/pyspark/ml/connect/base.py", line 105, in fit
return self._fit(dataset)
^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/ml/connect/tuning.py", line 447, in _fit
bestModel = cast(Model, est.fit(dataset, epm[bestIndex]))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/ml/connect/base.py", line 103, in fit
return self.copy(params)._fit(dataset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/ml/connect/classification.py", line 251, in _fit
model_state_dict = distributor._train_on_dataframe(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 1043, in _train_on_dataframe
return self._run_distributed_training(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 804, in _run_distributed_training
).collect()
^^^^^^^^^
File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 1369, in collect
sock_info = self._jdf.collectToPython()
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__
return_value = get_return_value(
^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/pyspark/errors/exceptions/captured.py", line 182, in deco
return f(*a, **kw)
^^^^^^^^^^^
File "/__w/spark/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o2582.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Could not recover from a failed barrier ResultStage. Most recent failure reason: Stage failed because barrier task ResultTask(89, 0) finished unsuccessfully.
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1535, in main
process()
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1527, in process
serializer.dump_stream(out_iter, outfile)
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 162, in dump_stream
return super(ArrowStreamUDFSerializer, self).dump_stream(wrap_and_init_stream(), stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 101, in dump_stream
for batch in iterator:
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 147, in wrap_and_init_stream
for batch, _ in iterator:
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1296, in func
for result_batch, result_type in result_iter:
File "/__w/spark/spark/python/pyspark/ml/torch/distributor.py", line 721, in wrapped_train_fn
output = TorchDistributor._get_output_from_framework_wrapper(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/ml/torch/distributor.py", line 567, in _get_output_from_framework_wrapper
return framework_wrapper(
^^^^^^^^^^^^^^^^^^
File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/ml/torch/distributor.py", line 908, in _run_training_on_pytorch_function
raise RuntimeError(
RuntimeError: TorchDistributor failed during training.View stdout logs for detailed error message.
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:517)
at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:117)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:473)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:601)
at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:388)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:891)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:891)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:628)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:96)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:631)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
at java.base/java.lang.Thread.run(Thread.java:840)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2820)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2817)
at scala.collection.immutable.List.foreach(List.scala:333)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2817)
at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:2252)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3081)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3021)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3010)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:990)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2428)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2449)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2468)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2493)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1047)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:408)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1046)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:448)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.$anonfun$executeCollect$1(AdaptiveSparkPlanExec.scala:380)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.withFinalPlanUpdate(AdaptiveSparkPlanExec.scala:408)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec.executeCollect(AdaptiveSparkPlanExec.scala:380)
at org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:4246)
at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4420)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:557)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:4418)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:150)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:241)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:116)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:72)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:196)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4418)
at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:4243)
at jdk.internal.reflect.GeneratedMethodAccessor84.invoke(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:568)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:840)
----------------------------------------------------------------------