Details
-
Bug
-
Status: Resolved
-
P3
-
Resolution: Won't Fix
-
2.17.0
-
None
Description
We are using Apache Beam in version 2.17.0 (Python SDK using Python 3.7) with the Dataflow runner running on the Google Cloud Platform.
We are getting frequently a index out of range exception in the filebasedsource.py, i.e. in that line: https://github.com/apache/beam/blob/release-2.17.0/sdks/python/apache_beam/io/filebasedsource.py#L370
The whole stack trace:
Traceback (most recent call last): File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py", line 650, in do_work work_executor.execute() File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py", line 176, in execute op.start() File "dataflow_worker/shuffle_operations.py", line 50, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start File "dataflow_worker/shuffle_operations.py", line 51, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start File "dataflow_worker/shuffle_operations.py", line 66, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start File "dataflow_worker/shuffle_operations.py", line 67, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start File "dataflow_worker/shuffle_operations.py", line 71, in dataflow_worker.shuffle_operations.GroupedShuffleReadOperation.start File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive File "dataflow_worker/shuffle_operations.py", line 234, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process File "dataflow_worker/shuffle_operations.py", line 241, in dataflow_worker.shuffle_operations.BatchGroupAlsoByWindowsOperation.process File "apache_beam/runners/worker/operations.py", line 256, in apache_beam.runners.worker.operations.Operation.output File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 834, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process File "apache_beam/runners/common.py", line 919, in apache_beam.runners.common._OutputProcessor.process_outputs File "apache_beam/runners/worker/operations.py", line 143, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive File "apache_beam/runners/worker/operations.py", line 593, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/worker/operations.py", line 594, in apache_beam.runners.worker.operations.DoOperation.process File "apache_beam/runners/common.py", line 776, in apache_beam.runners.common.DoFnRunner.receive File "apache_beam/runners/common.py", line 782, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 849, in apache_beam.runners.common.DoFnRunner._reraise_augmented File "/usr/local/lib/python3.7/site-packages/future/utils/__init__.py", line 421, in raise_with_traceback raise exc.with_traceback(traceback) File "apache_beam/runners/common.py", line 780, in apache_beam.runners.common.DoFnRunner.process File "apache_beam/runners/common.py", line 440, in apache_beam.runners.common.SimpleInvoker.invoke_process File "apache_beam/runners/common.py", line 895, in apache_beam.runners.common._OutputProcessor.process_outputs File "/usr/local/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py", line 370, in process source = list(source.split(float('inf')))[0].source IndexError: list index out of range [while running 'example/new_data/read/ReadAllFiles/ReadRange']