Details
-
Bug
-
Status: Resolved
-
P2
-
Resolution: Fixed
-
None
Description
This has been perma-red for the last week.
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest testMethod=test_flight_delays> @pytest.mark.examples_postcommit @pytest.mark.it_postcommit def test_flight_delays(self): flight_delays.run_flight_delay_pipeline( self.test_pipeline, start_date='2012-12-23', end_date='2012-12-25', > output=self.output_path) apache_beam/examples/dataframe/flight_delays_it_test.py:110: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ apache_beam/examples/dataframe/flight_delays.py:105: in run_flight_delay_pipeline result.to_csv(output) apache_beam/pipeline.py:596: in __exit__ self.result = self.run() apache_beam/testing/test_pipeline.py:114: in run False if self.not_use_test_runner_api else test_runner_api)) apache_beam/pipeline.py:549: in run self._options).run(False) apache_beam/pipeline.py:573: in run return self.runner.run_pipeline(self, self._options) apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline self.result.wait_until_finish(duration=wait_duration) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <DataflowPipelineResult <Job clientRequestId: '20220420090044988852-7153' createTime: '2022-04-20T09:00:55.136265Z' ...022-04-20T09:00:55.136265Z' steps: [] tempFiles: [] type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80> duration = None def wait_until_finish(self, duration=None): if not self.is_in_terminal_state(): if not self.has_job: raise IOError('Failed to get the Dataflow job id.') thread = threading.Thread( target=DataflowRunner.poll_for_job_completion, args=(self._runner, self, duration)) # Mark the thread as a daemon thread so a keyboard interrupt on the main # thread will terminate everything. This is also the reason we will not # use thread.join() to wait for the polling thread. thread.daemon = True thread.start() while thread.is_alive(): time.sleep(5.0) # TODO: Merge the termination code in poll_for_job_completion and # is_in_terminal_state. terminated = self.is_in_terminal_state() assert duration or terminated, ( 'Job did not reach to a terminal state after waiting indefinitely.') # TODO(BEAM-14291): Also run this check if wait_until_finish was called # after the pipeline completed. if terminated and self.state != PipelineState.DONE: # TODO(BEAM-1290): Consider converting this to an error log based on # theresolution of the issue. raise DataflowRuntimeException( 'Dataflow pipeline failed. State: %s, Error:\n%s' % (self.state, getattr(self._runner, 'last_error_msg', None)), > self) E apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error: E Traceback (most recent call last): E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line 646, in do_work E work_executor.execute() E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 255, in execute E self._split_task) E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 263, in _perform_source_split_considering_api_limits E desired_bundle_size) E File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 300, in _perform_source_split E for split in source.split(desired_bundle_size): E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 810, in split E self._setup_temporary_dataset(bq) E File "/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py", line 193, in _f E return fnc(self, *args, **kwargs) E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 849, in _setup_temporary_dataset E self._get_project(), self.query.get(), self.use_legacy_sql) E File "/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line 253, in wrapper E return fun(*args, **kwargs) E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py", line 416, in get_query_location E response = self.client.jobs.Insert(request) E File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py", line 345, in Insert E upload=upload, upload_config=upload_config) E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 731, in _RunMethod E return self.ProcessHttpResponse(method_config, http_response, request) E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 737, in ProcessHttpResponse E self.__ProcessHttpResponse(method_config, http_response, request)) E File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 604, in __ProcessHttpResponse E http_response, method_config=method_config, request=request) E apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing <https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>: response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT', 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528', '-content-encoding': 'gzip'}>, content <{ E "error": { E "code": 403, E "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.", E "errors": [ E { E "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.", E "domain": "global", E "reason": "accessDenied" E } E ], E "status": "PERMISSION_DENIED" E } E } E > apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException