Uploaded image for project: 'Cassandra'
  1. Cassandra
  2. CASSANDRA-17891

Fix hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Normal
    • Resolution: Fixed
    • 4.1-rc1, 4.1, 4.2
    • CI
    • None

    Description

      hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window is flaky

       

      self = <hintedhandoff_test.TestHintedHandoff object at 0x7f2d903b8630>
      
          @since('4.1')
          def test_hintedhandoff_window(self):
              """
                  Test that we only store at a maximum the hint window worth of hints.
                  Prior to CASSANDRA-14309 we would store another window worth of hints
                  if the down node was brought up and then taken back down immediately.
                  We would also store another window of hints on a live node if the live
                  node was restarted.
                  @jira_ticket CASSANDRA-14309
                  """
          
              # hint_window_persistent_enabled is set to true by default
              self.cluster.set_configuration_options({'max_hint_window_in_ms': 10000,
                                                      'hinted_handoff_enabled': True,
                                                      'max_hints_delivery_threads': 1,
                                                      'hints_flush_period_in_ms': 100, })
              self.cluster.populate(2).start()
              node1, node2 = self.cluster.nodelist()
              session = self.patient_cql_connection(node1)
              create_ks(session, 'ks', 2)
              create_c1c2_table(self, session)
          
              # Stop handoff until very end and take node2 down for first round of hints
              node1.nodetool('pausehandoff')
          
              node2.nodetool('disablebinary')
              node2.watch_log_for(["Stop listening for CQL clients"], timeout=120)
          
              node2.nodetool('disablegossip')
              node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120)
              node1.watch_log_for(["state jump to shutdown"], timeout=120)
          
              log_mark_node_1 = node1.mark_log()
              log_mark_node_2 = node2.mark_log()
          
              # First round of hints. We expect these to be replayed and the only
              # hints within the window
              insert_c1c2(session, n=(0, 100), consistency=ConsistencyLevel.ONE)
          
              # Let hint window pass
              time.sleep(15)
          
              # Re-enable and disable the node. Prior to CASSANDRA-14215 this should make the hint window on node1 reset.
              node2.nodetool('enablegossip')
              node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
              node1.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_1)
          
              log_mark_node_1 = node1.mark_log()
              log_mark_node_2 = node2.mark_log()
          
              node2.nodetool('disablegossip')
          
              node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120, from_mark=log_mark_node_2)
              node1.watch_log_for(["state jump to shutdown"], timeout=120, from_mark=log_mark_node_1)
          
              log_mark_node_1 = node1.mark_log()
              log_mark_node_2 = node2.mark_log()
          
              def endpoint_downtime(node_to_query, node):
                  mbean = make_mbean('net', type='Gossiper')
                  with JolokiaAgent(node_to_query) as jmx:
                      return jmx.execute_method(mbean, 'getEndpointDowntime(java.lang.String)', [node])
          
              while endpoint_downtime(node1, "127.0.0.2") <= 5000:
                  time.sleep(1)
          
              # Second round of inserts. We do not expect hints to be stored.
              insert_c1c2(session, n=(100, 200), consistency=ConsistencyLevel.ONE)
          
              # Restart node1. Prior to CASSANDRA-14215 this would reset node1's hint window.
              node1.stop()
              node1.start(wait_for_binary_proto=True, wait_other_notice=False)
              session = self.patient_exclusive_cql_connection(node1)
              session.execute('USE ks')
              # Third round of inserts. We do not expect hints to be stored.
              insert_c1c2(session, n=(200, 300), consistency=ConsistencyLevel.ONE)
          
              # Enable node2 and wait for hints to be replayed
              node2.nodetool('enablegossip')
              node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
          
              node2.nodetool('enablebinary')
              node2.watch_log_for(["Starting listening for CQL clients"], timeout=120, from_mark=log_mark_node_2)
              node1.nodetool('resumehandoff')
              node1.watch_log_for('Finished hinted handoff')
              # Stop node1 so that we only query node2
              node1.stop()
          
              session = self.patient_exclusive_cql_connection(node2)
              session.execute('USE ks')
              # Ensure first dataset is present (through replayed hints)
              for x in range(0, 100):
                  query_c1c2(session, x, ConsistencyLevel.ONE)
          
              # Ensure second and third datasets are not present
              for x in range(100, 300):
      >           query_c1c2(session, x, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
      
      hintedhandoff_test.py:303: 
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      tools/data.py:47: in query_c1c2
          assertions.assert_length_equal(rows, 0)
      _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
      
      object_with_length = [Row(c1='value1', c2='value2')], expected_length = 0
      
          def assert_length_equal(object_with_length, expected_length):
              """
              Assert an object has a specific length.
              @param object_with_length The object whose length will be checked
              @param expected_length The expected length of the object
          
              Examples:
              assert_length_equal(res, nb_counter)
              """
              assert len(object_with_length) == expected_length, \
                  "Expected {} to have length {}, but instead is of length {}"\
      >           .format(object_with_length, expected_length, len(object_with_length))
      E       AssertionError: Expected [Row(c1='value1', c2='value2')] to have length 0, but instead is of length 1
      
      tools/assertions.py:269: AssertionError
      

       

      Attachments

        Issue Links

          Activity

            People

              stefan.miklosovic Stefan Miklosovic
              e.dimitrova Ekaterina Dimitrova
              Stefan Miklosovic
              Brandon Williams
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: