diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc b/src/kudu/integration-tests/raft_consensus-itest.cc index 112f8d8..f7c380b 100644 --- a/src/kudu/integration-tests/raft_consensus-itest.cc +++ b/src/kudu/integration-tests/raft_consensus-itest.cc @@ -387,6 +387,45 @@ TEST_F(RaftConsensusITest, TestGetPermanentUuid) { ASSERT_EQ(expected_uuid, peer.permanent_uuid()); } +// Test that a client fails over when the leader times out. +TEST_F(RaftConsensusITest, TestClientFailoverOnLeaderTimeout) { + LOG(INFO) << "Starting..."; + BuildAndStart(vector()); + TestWorkload workload(cluster_.get()); + workload.set_table_name(kTableId); + workload.Setup(); + workload.Start(); + + TServerDetails* leader = NULL; + ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader)); + while (workload.rows_inserted() < 100) { + SleepFor(MonoDelta::FromMilliseconds(10)); + } + workload.StopAndJoin(); + + // Now pause the leader. This will cause the client to get timeout errors. + LOG(INFO) << "Pausing leader..."; + ASSERT_OK(cluster_->tablet_server_by_uuid(leader->uuid())->Pause()); + + // Wait for a new replica to be elected. + LOG(INFO) << "Waiting for new leader..."; + ASSERT_OK(GetLeaderReplicaWithRetries(tablet_id_, &leader)); + + // Write 100 more rows. + LOG(INFO) << "Attempting to write more rows to new leader..."; + int rows_target = workload.rows_inserted() + 100; + workload.set_timeout_allowed(true); + workload.set_write_timeout_millis(500); + workload.Start(); + for (int i = 0; i < 1000; i++) { + if (workload.rows_inserted() >= rows_target) break; + SleepFor(MonoDelta::FromMilliseconds(10)); + } + workload.StopAndJoin(); + CHECK_GE(workload.rows_inserted(), rows_target); + ASSERT_OK(cluster_->tablet_server_by_uuid(leader->uuid())->Resume()); +} + // TODO allow the scan to define an operation id, fetch the last id // from the leader and then use that id to make the replica wait // until it is done. This will avoid the sleeps below.