Uploaded image for project: 'Cassandra'
  1. Cassandra
  2. CASSANDRA-16220

python dtest pending_range_test.py::TestPendingRangeMovements::test_pending_range (@pytest.mark.resource_intensive) fails on trunk

Agile BoardAttach filesAttach ScreenshotVotersWatch issueWatchersCreate sub-taskConvert to sub-taskMoveLinkCloneLabelsUpdate Comment AuthorReplace String in CommentUpdate Comment VisibilityDelete Comments
    XMLWordPrintableJSON

    Details

      Description

      The test has the following section

      if cluster.version() >= '2.2':
        node2.watch_log_for('127.0.0.1 state moving', timeout=10, filename='debug.log’)
      

      The issue is that in trunk we have the port attached to the log, so the log is now

      DEBUG [GossipStage:1] 2020-10-21 00:48:20,104 StorageService.java:2452 - Node /127.0.0.1:7000 state MOVING, tokens [-9223372036854775808]
      DEBUG [GossipStage:1] 2020-10-21 00:48:20,105 StorageService.java:2670 - Node /127.0.0.1:7000 state moving, new token -634023222112864484
      

      Since the log now contains the port, this test always times out on trunk when it hits this

      self = <pending_range_test.TestPendingRangeMovements object at 0x7fc5d35c85f8>
           @pytest.mark.resource_intensive
          def test_pending_range(self):
              """
                  @jira_ticket CASSANDRA-10887
                  """
              cluster = self.cluster
              # If we are on 2.1, we need to set the log level to debug or higher, as debug.log does not exist.
              if cluster.version() < '2.2':
                  cluster.set_log_level('DEBUG')
         
              # Create 5 node cluster
              cluster.populate(5).start()
              node1, node2 = cluster.nodelist()[0:2]
         
              # Set up RF=3 keyspace
              session = self.patient_cql_connection(node1)
              create_ks(session, 'ks', 3)
         
              session.execute("CREATE TABLE users (login text PRIMARY KEY, email text, name text, login_count int)")
         
              # We use the partition key 'jdoe3' because it belongs to node1.
              # The key MUST belong to node1 to repro the bug.
              session.execute("INSERT INTO users (login, email, name, login_count) VALUES ('jdoe3', 'jdoe@abc.com', 'Jane Doe', 1) IF NOT EXISTS;")
         
              lwt_query = SimpleStatement("UPDATE users SET email = 'janedoe@abc.com' WHERE login = 'jdoe3' IF email = 'jdoe@abc.com'")
         
              # Show we can execute LWT no problem
              for i in range(1000):
                  session.execute(lwt_query)
         
              token = '-634023222112864484'
         
              mark = node1.mark_log()
         
              # Move a node
              node1.nodetool('move {}'.format(token))
         
              # Watch the log so we know when the node is moving
              node1.watch_log_for('Moving .* to {}'.format(token), timeout=10, from_mark=mark)
              node1.watch_log_for('Sleeping 30000 ms before start streaming/fetching ranges', timeout=10, from_mark=mark)
         
              if cluster.version() >= '2.2':
      >           node2.watch_log_for('127.0.0.1 state moving', timeout=10, filename='debug.log')
       pending_range_test.py:57: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
       self = <ccmlib.node.Node object at 0x7fc5d3b51208>
      exprs = '127.0.0.1 state moving', from_mark = None, timeout = 10, process = None
      verbose = False, filename = 'debug.log'
           def watch_log_for(self, exprs, from_mark=None, timeout=600, process=None, verbose=False, filename='system.log'):
              """
                  Watch the log until one or more (regular) expressions are found or timeouts (a
                  TimeoutError is then raised). On successful completion, a list of pair (line matched,
                  match object) is returned.
                  """
              start = time.time()
              tofind = [exprs] if isinstance(exprs, string_types) else exprs
              tofind = [re.compile(e) for e in tofind]
              matchings = []
              reads = ""
              if len(tofind) == 0:
                  return None
         
              log_file = os.path.join(self.log_directory(), filename)
              output_read = False
              while not os.path.exists(log_file):
                  time.sleep(.5)
                  if start + timeout < time.time():
                      raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Timed out waiting for {} to be created.".format(log_file))
                  if process and not output_read:
                      process.poll()
                      if process.returncode is not None:
                          self.print_process_output(self.name, process, verbose)
                          output_read = True
                          if process.returncode != 0:
                              raise RuntimeError()  # Shouldn't reuse RuntimeError but I'm lazy
         
              with open(log_file) as f:
                  if from_mark:
                      f.seek(from_mark)
         
                  while True:
                      # First, if we have a process to check, then check it.
                      # Skip on Windows - stdout/stderr is cassandra.bat
                      if not common.is_win() and not output_read:
                          if process:
                              process.poll()
                              if process.returncode is not None:
                                  self.print_process_output(self.name, process, verbose)
                                  output_read = True
                                  if process.returncode != 0:
                                      raise RuntimeError()  # Shouldn't reuse RuntimeError but I'm lazy
         
                      line = f.readline()
                      if line:
                          reads = reads + line
                          for e in tofind:
                              m = e.search(line)
                              if m:
                                  matchings.append((line, m))
                                  tofind.remove(e)
                                  if len(tofind) == 0:
                                      return matchings[0] if isinstance(exprs, string_types) else matchings
                      else:
                          # yep, it's ugly
                          time.sleep(1)
                          if start + timeout < time.time():
      >                       raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Missing: " + str([e.pattern for e in tofind]) + ":\n" + reads[:50] + ".....\nSee {} for remainder".format(filename))
      E                       ccmlib.node.TimeoutError: 21 Oct 2020 00:50:26 [node2] Missing: ['127.0.0.1 state moving']:
      E                       INFO  [main] 2020-10-21 00:49:15,878 YamlConfigura.....
      E                       See debug.log for remainder
       /opt/rh/rh-python36/root/usr/lib/python3.6/site-packages/ccmlib/node.py:550: TimeoutError
      

        Attachments

        Issue Links

          Activity

            People

            • Assignee:
              e.dimitrova Ekaterina Dimitrova Assign to me
              Reporter:
              dcapwell David Capwell
              Authors:
              Ekaterina Dimitrova
              Reviewers:
              Berenguer Blasi, Brandon Williams

              Dates

              • Created:
                Updated:
                Resolved:

                Issue deployment