Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-5949

test_exchange_small_delay failure: Expected exception: Sender timed out waiting for receiver fragment instance

Agile BoardAttach filesAttach ScreenshotVotersWatch issueWatchersCreate sub-taskLinkCloneUpdate Comment AuthorReplace String in CommentUpdate Comment VisibilityDelete Comments
    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • Impala 2.11.0
    • Impala 2.11.0
    • Backend

    Description

      TestExchangeDelays.test_exchange_small_delay[exec_option: {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 'abort_on_error': 1, 'exec_single_node_rows_threshold': 0} | table_format: text/none] 
      
      self = <test_exchange_delays.TestExchangeDelays object at 0x5989d10>
      vector = <tests.common.test_vector.ImpalaTestVector object at 0x56d4d10>
      
          @pytest.mark.execute_serially
          @CustomClusterTestSuite.with_args("--stress_datastream_recvr_delay_ms=10000"
                " --datastream_sender_timeout_ms=5000")
          def test_exchange_small_delay(self, vector):
            """Test delays in registering data stream receivers where the first one or two
              batches will time out before the receiver registers, but subsequent batches will
              arrive after the receiver registers. Before IMPALA-2987, this scenario resulted in
              incorrect results.
              """
      >     self.run_test_case('QueryTest/exchange-delays', vector)
      
      custom_cluster/test_exchange_delays.py:39: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      self = <test_exchange_delays.TestExchangeDelays object at 0x5989d10>
      test_file_name = 'QueryTest/exchange-delays'
      vector = <tests.common.test_vector.ImpalaTestVector object at 0x56d4d10>
      use_db = None, multiple_impalad = False, encoding = None, test_file_vars = None
      
          def run_test_case(self, test_file_name, vector, use_db=None, multiple_impalad=False,
              encoding=None, test_file_vars=None):
            """
              Runs the queries in the specified test based on the vector values
          
              Runs the query using targeting the file format/compression specified in the test
              vector and the exec options specified in the test vector. If multiple_impalad=True
              a connection to a random impalad will be chosen to execute each test section.
              Otherwise, the default impalad client will be used.
              Additionally, the encoding for all test data can be specified using the 'encoding'
              parameter. This is useful when data is ingested in a different encoding (ex.
              latin). If not set, the default system encoding will be used.
              If a dict 'test_file_vars' is provided, then all keys will be replaced with their
              values in queries before they are executed. Callers need to avoid using reserved key
              names, see 'reserved_keywords' below.
              """
            table_format_info = vector.get_value('table_format')
            exec_options = vector.get_value('exec_option')
          
            # Resolve the current user's primary group name.
            group_id = pwd.getpwnam(getuser()).pw_gid
            group_name = grp.getgrgid(group_id).gr_name
          
            target_impalad_clients = list()
            if multiple_impalad:
              target_impalad_clients =\
                  map(ImpalaTestSuite.create_impala_client, IMPALAD_HOST_PORT_LIST)
            else:
              target_impalad_clients = [self.client]
          
            # Change the database to reflect the file_format, compression codec etc, or the
            # user specified database for all targeted impalad.
            for impalad_client in target_impalad_clients:
              ImpalaTestSuite.change_database(impalad_client,
                  table_format_info, use_db, pytest.config.option.scale_factor)
              impalad_client.set_configuration(exec_options)
          
            sections = self.load_query_test_file(self.get_workload(), test_file_name,
                encoding=encoding)
            for test_section in sections:
              if 'SHELL' in test_section:
                assert len(test_section) == 1, \
                  "SHELL test sections can't contain other sections"
                cmd = test_section['SHELL']\
                  .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX)\
                  .replace('$IMPALA_HOME', IMPALA_HOME)
                if use_db: cmd = cmd.replace('$DATABASE', use_db)
                LOG.info("Shell command: " + cmd)
                check_call(cmd, shell=True)
                continue
          
              if 'QUERY' not in test_section:
                assert 0, 'Error in test file %s. Test cases require a -- QUERY section.\n%s' %\
                    (test_file_name, pprint.pformat(test_section))
          
              if 'SETUP' in test_section:
                self.execute_test_case_setup(test_section['SETUP'], table_format_info)
          
              # TODO: support running query tests against different scale factors
              query = QueryTestSectionReader.build_query(test_section['QUERY']
                  .replace('$GROUP_NAME', group_name)
                  .replace('$IMPALA_HOME', IMPALA_HOME)
                  .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX)
                  .replace('$SECONDARY_FILESYSTEM', os.getenv("SECONDARY_FILESYSTEM") or str()))
              if use_db: query = query.replace('$DATABASE', use_db)
          
              reserved_keywords = ["$DATABASE", "$FILESYSTEM_PREFIX", "$GROUP_NAME",
                                   "$IMPALA_HOME", "$NAMENODE", "$QUERY", "$SECONDARY_FILESYSTEM"]
          
              if test_file_vars:
                for key, value in test_file_vars.iteritems():
                  if key in reserved_keywords:
                    raise RuntimeError("Key {0} is reserved".format(key))
                  query = query.replace(key, value)
          
              if 'QUERY_NAME' in test_section:
                LOG.info('Query Name: \n%s\n' % test_section['QUERY_NAME'])
          
              # Support running multiple queries within the same test section, only verifying the
              # result of the final query. The main use case is to allow for 'USE database'
              # statements before a query executes, but it is not limited to that.
              # TODO: consider supporting result verification of all queries in the future
              result = None
              target_impalad_client = choice(target_impalad_clients)
              query_options_changed = []
              try:
                user = None
                if 'USER' in test_section:
                  # Create a new client so the session will use the new username.
                  user = test_section['USER'].strip()
                  target_impalad_client = self.create_impala_client()
                for query in query.split(';'):
                  set_pattern_match = SET_PATTERN.match(query)
                  if set_pattern_match != None:
                    query_options_changed.append(set_pattern_match.groups()[0])
                  result = self.__execute_query(target_impalad_client, query, user=user)
              except Exception as e:
                if 'CATCH' in test_section:
                  self.__verify_exceptions(test_section['CATCH'], str(e), use_db)
                  continue
                raise
              finally:
                if len(query_options_changed) > 0:
                  self.__restore_query_options(query_options_changed, target_impalad_client)
          
              if 'CATCH' in test_section and '__NO_ERROR__' not in test_section['CATCH']:
                expected_str = " or ".join(test_section['CATCH']).strip() \
                  .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX) \
                  .replace('$NAMENODE', NAMENODE) \
                  .replace('$IMPALA_HOME', IMPALA_HOME)
      >         assert False, "Expected exception: %s" % expected_str
      E         AssertionError: Expected exception: Sender timed out waiting for receiver fragment instance
      
      common/impala_test_suite.py:406: AssertionError
      ---------------------------- Captured stdout setup -----------------------------
      Starting State Store logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/statestored.INFO
      Starting Catalog Service logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/catalogd.INFO
      Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad.INFO
      Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad_node1.INFO
      Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad_node2.INFO
      Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
      Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
      Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
      Impala Cluster Running with 3 nodes and 3 coordinators.
      ---------------------------- Captured stderr setup -----------------------------
      MainThread: Found 3 impalad/1 statestored/1 catalogd process(es)
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
      MainThread: Debug webpage not yet available.
      MainThread: Waiting for num_known_live_backends=3. Current value: 1
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
      MainThread: Waiting for num_known_live_backends=3. Current value: 2
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
      MainThread: Waiting for num_known_live_backends=3. Current value: 2
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
      MainThread: num_known_live_backends has reached value: 3
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25001
      MainThread: num_known_live_backends has reached value: 3
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25002
      MainThread: num_known_live_backends has reached value: 3
      MainThread: Found 3 impalad/1 statestored/1 catalogd process(es)
      MainThread: Getting metric: statestore.live-backends from impala-boost-static-burst-slave-1364.test:25010
      MainThread: Metric 'statestore.live-backends' has reach desired value: 4
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
      MainThread: num_known_live_backends has reached value: 3
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25001
      MainThread: num_known_live_backends has reached value: 3
      MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25002
      MainThread: num_known_live_backends has reached value: 3
      -- connecting to: localhost:21000
      ----------------------------- Captured stderr call -----------------------------
      -- executing against localhost:21000
      use functional;
      
      SET batch_size=0;
      SET num_nodes=0;
      SET disable_codegen_rows_threshold=0;
      SET disable_codegen=False;
      SET abort_on_error=1;
      SET exec_single_node_rows_threshold=0;
      -- executing against localhost:21000
      select count(*)
      from tpch.lineitem
        inner join tpch.orders on l_orderkey = o_orderkey;
      
      -- executing against localhost:21000
      select l_orderkey, count(*)
      from tpch.lineitem
      where l_linenumber = -1
      group by l_orderkey;

      Attachments

        Issue Links

        Activity

          This comment will be Viewable by All Users Viewable by All Users
          Cancel

          People

            tarmstrong Tim Armstrong
            mmulder Matthew Mulder
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Slack

                Issue deployment