From d09031d7a68485ae5f5fd0a1e1312449b12d303a Mon Sep 17 00:00:00 2001 From: Duo Zhang Date: Thu, 23 Aug 2018 17:06:01 +0800 Subject: [PATCH] HBASE-21095 The timeout retry logic for several procedures are broken after master restarts --- .../master/assignment/AssignmentManager.java | 67 ++++++++++++++----- .../TransitRegionStateProcedure.java | 19 +++++- .../procedure/ServerCrashProcedure.java | 15 +++-- .../TestCloseRegionWhileRSCrash.java | 11 ++- 4 files changed, 87 insertions(+), 25 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 9b020c811f..a91f8e45a4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1414,12 +1414,25 @@ public class AssignmentManager implements ServerListener { // Region Status update // Should only be called in TransitRegionStateProcedure // ============================================================================================ + private void transitStateAndUpdate(RegionStateNode regionNode, RegionState.State newState, + RegionState.State... expectedStates) throws IOException { + RegionState.State state = regionNode.getState(); + regionNode.transitionState(newState, expectedStates); + boolean succ = false; + try { + regionStateStore.updateRegionLocation(regionNode); + succ = true; + } finally { + if (!succ) { + // revert + regionNode.setState(state); + } + } + } // should be called within the synchronized block of RegionStateNode void regionOpening(RegionStateNode regionNode) throws IOException { - regionNode.transitionState(State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN); - regionStateStore.updateRegionLocation(regionNode); - + transitStateAndUpdate(regionNode, State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN); regionStates.addRegionToServer(regionNode); // update the operation count metrics metrics.incrementOperationCounter(); @@ -1429,23 +1442,33 @@ public class AssignmentManager implements ServerListener { // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then // we will persist the FAILED_OPEN state into hbase:meta. void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOException { - if (regionNode.getRegionLocation() != null) { - regionStates.removeRegionFromServer(regionNode.getRegionLocation(), regionNode); - } + RegionState.State state = regionNode.getState(); + ServerName regionLocation = regionNode.getRegionLocation(); if (giveUp) { regionNode.setState(State.FAILED_OPEN); regionNode.setRegionLocation(null); - regionStateStore.updateRegionLocation(regionNode); + boolean succ = false; + try { + regionStateStore.updateRegionLocation(regionNode); + succ = true; + } finally { + if (!succ) { + // revert + regionNode.setState(state); + regionNode.setRegionLocation(regionLocation); + } + } + } + if (regionLocation != null) { + regionStates.removeRegionFromServer(regionLocation, regionNode); } } // should be called within the synchronized block of RegionStateNode void regionOpened(RegionStateNode regionNode) throws IOException { - regionNode.transitionState(State.OPEN, RegionStates.STATES_EXPECTED_ON_OPEN); // TODO: OPENING Updates hbase:meta too... we need to do both here and there? // That is a lot of hbase:meta writing. - regionStateStore.updateRegionLocation(regionNode); - + transitStateAndUpdate(regionNode, State.OPEN, RegionStates.STATES_EXPECTED_ON_OPEN); RegionInfo hri = regionNode.getRegionInfo(); if (isMetaRegion(hri)) { // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it @@ -1460,7 +1483,7 @@ public class AssignmentManager implements ServerListener { // should be called within the synchronized block of RegionStateNode void regionClosing(RegionStateNode regionNode) throws IOException { - regionNode.transitionState(State.CLOSING, RegionStates.STATES_EXPECTED_ON_CLOSE); + transitStateAndUpdate(regionNode, State.CLOSING, RegionStates.STATES_EXPECTED_ON_CLOSE); regionStateStore.updateRegionLocation(regionNode); RegionInfo hri = regionNode.getRegionInfo(); @@ -1477,16 +1500,26 @@ public class AssignmentManager implements ServerListener { // The parameter 'normally' means whether we are closed cleanly, if it is true, then it means that // we are closed due to a RS crash. void regionClosed(RegionStateNode regionNode, boolean normally) throws IOException { + RegionState.State state = regionNode.getState(); + ServerName regionLocation = regionNode.getRegionLocation(); regionNode.transitionState(normally ? State.CLOSED : State.ABNORMALLY_CLOSED, RegionStates.STATES_EXPECTED_ON_CLOSE); - ServerName loc = regionNode.getRegionLocation(); - if (loc != null) { - // could be a retry so add a check here to avoid set the lastHost to null. - regionNode.setLastHost(loc); + boolean succ = false; + try { + regionStateStore.updateRegionLocation(regionNode); + succ = true; + } finally { + if (!succ) { + // revert + regionNode.setState(state); + regionNode.setRegionLocation(regionLocation); + } + } + if (regionLocation != null) { + regionNode.setLastHost(regionLocation); regionNode.setRegionLocation(null); - regionStates.removeRegionFromServer(loc, regionNode); + regionStates.removeRegionFromServer(regionLocation, regionNode); } - regionStateStore.updateRegionLocation(regionNode); } public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java index e853b9bbac..05a53ab8af 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/TransitRegionStateProcedure.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.procedure2.Procedure; +import org.apache.hadoop.hbase.procedure2.ProcedureEvent; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; @@ -42,6 +43,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionStateData; import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; /** @@ -99,6 +101,8 @@ public class TransitRegionStateProcedure private static final Logger LOG = LoggerFactory.getLogger(TransitRegionStateProcedure.class); + private final ProcedureEvent retryingEvent = new ProcedureEvent<>(this); + private RegionStateTransitionState initialState; private RegionStateTransitionState lastState; @@ -326,8 +330,8 @@ public class TransitRegionStateProcedure "Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " + "by other Procedure or operator intervention", backoff / 1000, this, regionNode.toShortString(), e); - regionNode.getProcedureEvent().suspend(); - if (regionNode.getProcedureEvent().suspendIfNotReady(this)) { + retryingEvent.suspend(); + if (retryingEvent.suspendIfNotReady(this)) { setTimeout(Math.toIntExact(backoff)); setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); throw new ProcedureSuspendedException(); @@ -342,10 +346,19 @@ public class TransitRegionStateProcedure @Override protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { setState(ProcedureProtos.ProcedureState.RUNNABLE); - getRegionStateNode(env).getProcedureEvent().wake(env.getProcedureScheduler()); + retryingEvent.wake(env.getProcedureScheduler()); return false; // 'false' means that this procedure handled the timeout } + @Override + protected void afterReplay(MasterProcedureEnv env) { + if (getState() == ProcedureState.WAITING_TIMEOUT) { + // restore the suspend state + retryingEvent.suspend(); + retryingEvent.suspendIfNotReady(this); + } + } + private void reportTransitionOpened(MasterProcedureEnv env, RegionStateNode regionNode, ServerName serverName, TransitionCode code, long openSeqNum) throws IOException { switch (code) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index db7a872c38..1fcc6eb6b9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -114,6 +114,17 @@ public class ServerCrashProcedure notifiedDeadServer = true; } + switch (state) { + case SERVER_CRASH_START: + case SERVER_CRASH_SPLIT_META_LOGS: + case SERVER_CRASH_ASSIGN_META: + break; + default: + // If hbase:meta is not assigned, yield. + if (env.getAssignmentManager().waitMetaLoaded(this)) { + throw new ProcedureSuspendedException(); + } + } try { switch (state) { case SERVER_CRASH_START: @@ -134,10 +145,6 @@ public class ServerCrashProcedure setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); break; case SERVER_CRASH_GET_REGIONS: - // If hbase:meta is not assigned, yield. - if (env.getAssignmentManager().waitMetaLoaded(this)) { - throw new ProcedureSuspendedException(); - } this.regionsOnCrashedServer = services.getAssignmentManager().getRegionStates().getServerRegionInfoSet(serverName); // Where to go next? Depends on whether we should split logs at all or diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java index 9b1f4ca52e..3573bd661e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.master.procedure.ServerProcedureInterface; @@ -149,6 +150,7 @@ public class TestCloseRegionWhileRSCrash { @BeforeClass public static void setUp() throws Exception { + UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); UTIL.startMiniCluster(3); UTIL.createTable(TABLE_NAME, CF); UTIL.getAdmin().balancerSwitch(false, true); @@ -174,7 +176,7 @@ public class TestCloseRegionWhileRSCrash { HRegionServer dstRs = UTIL.getOtherRegionServer(srcRs); ProcedureExecutor procExec = UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); - procExec.submitProcedure(new DummyServerProcedure(srcRs.getServerName())); + long dummyProcId = procExec.submitProcedure(new DummyServerProcedure(srcRs.getServerName())); ARRIVE.await(); UTIL.getMiniHBaseCluster().killRegionServer(srcRs.getServerName()); UTIL.waitFor(30000, @@ -206,7 +208,14 @@ public class TestCloseRegionWhileRSCrash { } Thread.sleep(1000); } + // let's close the connection to make sure that the SCP can not update meta successfully + UTIL.getMiniHBaseCluster().getMaster().getConnection().close(); RESUME.countDown(); + UTIL.waitFor(30000, () -> procExec.isFinished(dummyProcId)); + Thread.sleep(2000); + // here we restart + UTIL.getMiniHBaseCluster().stopMaster(0).join(); + UTIL.getMiniHBaseCluster().startMaster(); t.join(); // Make sure that the region is online, it may not on the original target server, as we will set // forceNewPlan to true if there is a server crash -- 2.17.1