Uploaded image for project: 'Beam'
  1. Beam
  2. BEAM-14336

Access Denied: Table bigquery-samples:airline_ontime_data.flights

Details

    Description

      This has been perma-red for the last week.
      https://ci-beam.apache.org/job/beam_PostCommit_Python36/5189/testReport/junit/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/
      https://ci-beam.apache.org/job/beam_PostCommit_Python36/5215/testReport/apache_beam.examples.dataframe.flight_delays_it_test/FlightDelaysTest/test_flight_delays/

      self = <apache_beam.examples.dataframe.flight_delays_it_test.FlightDelaysTest testMethod=test_flight_delays>
      
          @pytest.mark.examples_postcommit
          @pytest.mark.it_postcommit
          def test_flight_delays(self):
            flight_delays.run_flight_delay_pipeline(
                self.test_pipeline,
                start_date='2012-12-23',
                end_date='2012-12-25',
      >         output=self.output_path)
      
      apache_beam/examples/dataframe/flight_delays_it_test.py:110: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      apache_beam/examples/dataframe/flight_delays.py:105: in run_flight_delay_pipeline
          result.to_csv(output)
      apache_beam/pipeline.py:596: in __exit__
          self.result = self.run()
      apache_beam/testing/test_pipeline.py:114: in run
          False if self.not_use_test_runner_api else test_runner_api))
      apache_beam/pipeline.py:549: in run
          self._options).run(False)
      apache_beam/pipeline.py:573: in run
          return self.runner.run_pipeline(self, self._options)
      apache_beam/runners/dataflow/test_dataflow_runner.py:64: in run_pipeline
          self.result.wait_until_finish(duration=wait_duration)
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <DataflowPipelineResult <Job
       clientRequestId: '20220420090044988852-7153'
       createTime: '2022-04-20T09:00:55.136265Z'
      ...022-04-20T09:00:55.136265Z'
       steps: []
       tempFiles: []
       type: TypeValueValuesEnum(JOB_TYPE_BATCH, 1)> at 0x7fcf0b6f0e80>
      duration = None
      
          def wait_until_finish(self, duration=None):
            if not self.is_in_terminal_state():
              if not self.has_job:
                raise IOError('Failed to get the Dataflow job id.')
          
              thread = threading.Thread(
                  target=DataflowRunner.poll_for_job_completion,
                  args=(self._runner, self, duration))
          
              # Mark the thread as a daemon thread so a keyboard interrupt on the main
              # thread will terminate everything. This is also the reason we will not
              # use thread.join() to wait for the polling thread.
              thread.daemon = True
              thread.start()
              while thread.is_alive():
                time.sleep(5.0)
          
              # TODO: Merge the termination code in poll_for_job_completion and
              # is_in_terminal_state.
              terminated = self.is_in_terminal_state()
              assert duration or terminated, (
                  'Job did not reach to a terminal state after waiting indefinitely.')
          
              # TODO(BEAM-14291): Also run this check if wait_until_finish was called
              # after the pipeline completed.
              if terminated and self.state != PipelineState.DONE:
                # TODO(BEAM-1290): Consider converting this to an error log based on
                # theresolution of the issue.
                raise DataflowRuntimeException(
                    'Dataflow pipeline failed. State: %s, Error:\n%s' %
                    (self.state, getattr(self._runner, 'last_error_msg', None)),
      >             self)
      E         apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
      E         Traceback (most recent call last):
      E           File "/usr/local/lib/python3.6/site-packages/dataflow_worker/batchworker.py", line 646, in do_work
      E             work_executor.execute()
      E           File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 255, in execute
      E             self._split_task)
      E           File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 263, in _perform_source_split_considering_api_limits
      E             desired_bundle_size)
      E           File "/usr/local/lib/python3.6/site-packages/dataflow_worker/executor.py", line 300, in _perform_source_split
      E             for split in source.split(desired_bundle_size):
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 810, in split
      E             self._setup_temporary_dataset(bq)
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/options/value_provider.py", line 193, in _f
      E             return fnc(self, *args, **kwargs)
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery.py", line 849, in _setup_temporary_dataset
      E             self._get_project(), self.query.get(), self.use_legacy_sql)
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/utils/retry.py", line 253, in wrapper
      E             return fun(*args, **kwargs)
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/bigquery_tools.py", line 416, in get_query_location
      E             response = self.client.jobs.Insert(request)
      E           File "/usr/local/lib/python3.6/site-packages/apache_beam/io/gcp/internal/clients/bigquery/bigquery_v2_client.py", line 345, in Insert
      E             upload=upload, upload_config=upload_config)
      E           File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 731, in _RunMethod
      E             return self.ProcessHttpResponse(method_config, http_response, request)
      E           File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 737, in ProcessHttpResponse
      E             self.__ProcessHttpResponse(method_config, http_response, request))
      E           File "/usr/local/lib/python3.6/site-packages/apitools/base/py/base_api.py", line 604, in __ProcessHttpResponse
      E             http_response, method_config=method_config, request=request)
      E         apitools.base.py.exceptions.HttpForbiddenError: HttpError accessing <https://bigquery.googleapis.com/bigquery/v2/projects/apache-beam-testing/jobs?alt=json>: response: <{'vary': 'Origin, X-Origin, Referer', 'content-type': 'application/json; charset=UTF-8', 'date': 'Wed, 20 Apr 2022 09:06:44 GMT', 'server': 'ESF', 'cache-control': 'private', 'x-xss-protection': '0', 'x-frame-options': 'SAMEORIGIN', 'x-content-type-options': 'nosniff', 'transfer-encoding': 'chunked', 'status': '403', 'content-length': '528', '-content-encoding': 'gzip'}>, content <{
      E           "error": {
      E             "code": 403,
      E             "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.",
      E             "errors": [
      E               {
      E                 "message": "Access Denied: Table bigquery-samples:airline_ontime_data.flights: User does not have permission to query table bigquery-samples:airline_ontime_data.flights.",
      E                 "domain": "global",
      E                 "reason": "accessDenied"
      E               }
      E             ],
      E             "status": "PERMISSION_DENIED"
      E           }
      E         }
      E         >
      
      apache_beam/runners/dataflow/dataflow_runner.py:1661: DataflowRuntimeException
      

      Attachments

        Activity

          People

            bhulette Brian Hulette
            apilloud Andrew Pilloud
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Time Tracking

                Estimated:
                Original Estimate - Not Specified
                Not Specified
                Remaining:
                Remaining Estimate - 0h
                0h
                Logged:
                Time Spent - 2h 10m
                2h 10m

                Slack

                  Issue deployment