[IMPALA-5949] test_exchange_small_delay failure: Expected exception: Sender timed out waiting for receiver fragment instance - ASF JIRA

Details

Type: Bug
Status: Resolved
Priority: Blocker
Resolution: Fixed
Affects Version/s: Impala 2.11.0
Fix Version/s: Impala 2.11.0
Component/s: Backend
Labels:
- broken-build

Target Version:

Impala 2.11.0
Epic Color:
ghx-label-1

Description

TestExchangeDelays.test_exchange_small_delay[exec_option: {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 'abort_on_error': 1, 'exec_single_node_rows_threshold': 0} | table_format: text/none] 

self = <test_exchange_delays.TestExchangeDelays object at 0x5989d10>
vector = <tests.common.test_vector.ImpalaTestVector object at 0x56d4d10>

    @pytest.mark.execute_serially
    @CustomClusterTestSuite.with_args("--stress_datastream_recvr_delay_ms=10000"
          " --datastream_sender_timeout_ms=5000")
    def test_exchange_small_delay(self, vector):
      """Test delays in registering data stream receivers where the first one or two
        batches will time out before the receiver registers, but subsequent batches will
        arrive after the receiver registers. Before IMPALA-2987, this scenario resulted in
        incorrect results.
        """
>     self.run_test_case('QueryTest/exchange-delays', vector)

custom_cluster/test_exchange_delays.py:39: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <test_exchange_delays.TestExchangeDelays object at 0x5989d10>
test_file_name = 'QueryTest/exchange-delays'
vector = <tests.common.test_vector.ImpalaTestVector object at 0x56d4d10>
use_db = None, multiple_impalad = False, encoding = None, test_file_vars = None

    def run_test_case(self, test_file_name, vector, use_db=None, multiple_impalad=False,
        encoding=None, test_file_vars=None):
      """
        Runs the queries in the specified test based on the vector values
    
        Runs the query using targeting the file format/compression specified in the test
        vector and the exec options specified in the test vector. If multiple_impalad=True
        a connection to a random impalad will be chosen to execute each test section.
        Otherwise, the default impalad client will be used.
        Additionally, the encoding for all test data can be specified using the 'encoding'
        parameter. This is useful when data is ingested in a different encoding (ex.
        latin). If not set, the default system encoding will be used.
        If a dict 'test_file_vars' is provided, then all keys will be replaced with their
        values in queries before they are executed. Callers need to avoid using reserved key
        names, see 'reserved_keywords' below.
        """
      table_format_info = vector.get_value('table_format')
      exec_options = vector.get_value('exec_option')
    
      # Resolve the current user's primary group name.
      group_id = pwd.getpwnam(getuser()).pw_gid
      group_name = grp.getgrgid(group_id).gr_name
    
      target_impalad_clients = list()
      if multiple_impalad:
        target_impalad_clients =\
            map(ImpalaTestSuite.create_impala_client, IMPALAD_HOST_PORT_LIST)
      else:
        target_impalad_clients = [self.client]
    
      # Change the database to reflect the file_format, compression codec etc, or the
      # user specified database for all targeted impalad.
      for impalad_client in target_impalad_clients:
        ImpalaTestSuite.change_database(impalad_client,
            table_format_info, use_db, pytest.config.option.scale_factor)
        impalad_client.set_configuration(exec_options)
    
      sections = self.load_query_test_file(self.get_workload(), test_file_name,
          encoding=encoding)
      for test_section in sections:
        if 'SHELL' in test_section:
          assert len(test_section) == 1, \
            "SHELL test sections can't contain other sections"
          cmd = test_section['SHELL']\
            .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX)\
            .replace('$IMPALA_HOME', IMPALA_HOME)
          if use_db: cmd = cmd.replace('$DATABASE', use_db)
          LOG.info("Shell command: " + cmd)
          check_call(cmd, shell=True)
          continue
    
        if 'QUERY' not in test_section:
          assert 0, 'Error in test file %s. Test cases require a -- QUERY section.\n%s' %\
              (test_file_name, pprint.pformat(test_section))
    
        if 'SETUP' in test_section:
          self.execute_test_case_setup(test_section['SETUP'], table_format_info)
    
        # TODO: support running query tests against different scale factors
        query = QueryTestSectionReader.build_query(test_section['QUERY']
            .replace('$GROUP_NAME', group_name)
            .replace('$IMPALA_HOME', IMPALA_HOME)
            .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX)
            .replace('$SECONDARY_FILESYSTEM', os.getenv("SECONDARY_FILESYSTEM") or str()))
        if use_db: query = query.replace('$DATABASE', use_db)
    
        reserved_keywords = ["$DATABASE", "$FILESYSTEM_PREFIX", "$GROUP_NAME",
                             "$IMPALA_HOME", "$NAMENODE", "$QUERY", "$SECONDARY_FILESYSTEM"]
    
        if test_file_vars:
          for key, value in test_file_vars.iteritems():
            if key in reserved_keywords:
              raise RuntimeError("Key {0} is reserved".format(key))
            query = query.replace(key, value)
    
        if 'QUERY_NAME' in test_section:
          LOG.info('Query Name: \n%s\n' % test_section['QUERY_NAME'])
    
        # Support running multiple queries within the same test section, only verifying the
        # result of the final query. The main use case is to allow for 'USE database'
        # statements before a query executes, but it is not limited to that.
        # TODO: consider supporting result verification of all queries in the future
        result = None
        target_impalad_client = choice(target_impalad_clients)
        query_options_changed = []
        try:
          user = None
          if 'USER' in test_section:
            # Create a new client so the session will use the new username.
            user = test_section['USER'].strip()
            target_impalad_client = self.create_impala_client()
          for query in query.split(';'):
            set_pattern_match = SET_PATTERN.match(query)
            if set_pattern_match != None:
              query_options_changed.append(set_pattern_match.groups()[0])
            result = self.__execute_query(target_impalad_client, query, user=user)
        except Exception as e:
          if 'CATCH' in test_section:
            self.__verify_exceptions(test_section['CATCH'], str(e), use_db)
            continue
          raise
        finally:
          if len(query_options_changed) > 0:
            self.__restore_query_options(query_options_changed, target_impalad_client)
    
        if 'CATCH' in test_section and '__NO_ERROR__' not in test_section['CATCH']:
          expected_str = " or ".join(test_section['CATCH']).strip() \
            .replace('$FILESYSTEM_PREFIX', FILESYSTEM_PREFIX) \
            .replace('$NAMENODE', NAMENODE) \
            .replace('$IMPALA_HOME', IMPALA_HOME)
>         assert False, "Expected exception: %s" % expected_str
E         AssertionError: Expected exception: Sender timed out waiting for receiver fragment instance

common/impala_test_suite.py:406: AssertionError
---------------------------- Captured stdout setup -----------------------------
Starting State Store logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/statestored.INFO
Starting Catalog Service logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/catalogd.INFO
Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad.INFO
Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad_node1.INFO
Starting Impala Daemon logging to /data/jenkins/workspace/impala-umbrella-build-and-test-isilon/repos/Impala/logs/custom_cluster_tests/impalad_node2.INFO
Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
Waiting for Catalog... Status: 50 DBs / 1077 tables (ready=True)
Impala Cluster Running with 3 nodes and 3 coordinators.
---------------------------- Captured stderr setup -----------------------------
MainThread: Found 3 impalad/1 statestored/1 catalogd process(es)
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
MainThread: Debug webpage not yet available.
MainThread: Waiting for num_known_live_backends=3. Current value: 1
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
MainThread: Waiting for num_known_live_backends=3. Current value: 2
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
MainThread: Waiting for num_known_live_backends=3. Current value: 2
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
MainThread: num_known_live_backends has reached value: 3
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25001
MainThread: num_known_live_backends has reached value: 3
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25002
MainThread: num_known_live_backends has reached value: 3
MainThread: Found 3 impalad/1 statestored/1 catalogd process(es)
MainThread: Getting metric: statestore.live-backends from impala-boost-static-burst-slave-1364.test:25010
MainThread: Metric 'statestore.live-backends' has reach desired value: 4
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25000
MainThread: num_known_live_backends has reached value: 3
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25001
MainThread: num_known_live_backends has reached value: 3
MainThread: Getting num_known_live_backends from impala-boost-static-burst-slave-1364.test:25002
MainThread: num_known_live_backends has reached value: 3
-- connecting to: localhost:21000
----------------------------- Captured stderr call -----------------------------
-- executing against localhost:21000
use functional;

SET batch_size=0;
SET num_nodes=0;
SET disable_codegen_rows_threshold=0;
SET disable_codegen=False;
SET abort_on_error=1;
SET exec_single_node_rows_threshold=0;
-- executing against localhost:21000
select count(*)
from tpch.lineitem
  inner join tpch.orders on l_orderkey = o_orderkey;

-- executing against localhost:21000
select l_orderkey, count(*)
from tpch.lineitem
where l_linenumber = -1
group by l_orderkey;

Attachments

Issue Links

Add Link

is broken by

IMPALA-5199 Impala may hang on empty row batch exchange

Resolved

Delete this link

Activity

Comment

This comment will be Viewable by All Users Viewable by All Users

Cancel

test_exchange_small_delay failure: Expected exception: Sender timed out waiting for receiver fragment instance

Details

Description

Attachments

Attachments

Issue Links

Activity

People

Dates

Agile

Slack

Issue deployment