From 04ebb827ff138152c074adb50c249bbec664ad23 Mon Sep 17 00:00:00 2001 From: Bahram Chehrazy Date: Wed, 13 Feb 2019 14:41:19 -0800 Subject: [PATCH] Update master's in-memory state of meta as soon as the meta server dies --- .../master/assignment/AssignmentManager.java | 16 ++++++++++++++ .../assignment/CloseRegionProcedure.java | 7 ++++++- .../hadoop/hbase/TestMetaTableAccessor.java | 11 ++++++++++ .../assignment/TestAssignmentManager.java | 21 +++++++++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 2d0c3bee9f..2d7c70d533 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1373,6 +1373,8 @@ public class AssignmentManager { return -1; } else { serverNode.setState(ServerState.CRASHED); + // If this server carries any system region, mark them as CLOSING before starting the procedure. + regionsClosing(serverNode.getSystemRegionInfoList()); pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), serverName, shouldSplitWal, carryingMeta)); LOG.info( @@ -1562,6 +1564,20 @@ public class AssignmentManager { metrics.incrementOperationCounter(); } + // Mark regions as closing + void regionsClosing(List regions) { + regions.forEach(ri -> { + try { + RegionStateNode regionState = getRegionStates().getRegionStateNode(ri); + synchronized (regionState) { + regionClosing(regionState); + } + } catch (IOException ex) { + LOG.error("Failed to mark system region state to CLOSING " + ri, ex); + } + }); + } + // should be called within the synchronized block of RegionStateNode // The parameter 'normally' means whether we are closed cleanly, if it is true, then it means that // we are closed due to a RS crash. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java index fd672fa036..330acc174f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java @@ -28,12 +28,15 @@ import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperat import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CloseRegionProcedureStateData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The remote procedure used to close a region. */ @InterfaceAudience.Private public class CloseRegionProcedure extends RegionRemoteProcedureBase { + private static final Logger LOG = LoggerFactory.getLogger(CloseRegionProcedure.class); // For a region move operation, we will assign the region after we unassign it, this is the target // server for the subsequent assign. We will send this value to RS, and RS will record the region @@ -83,6 +86,8 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase { @Override protected boolean shouldDispatch(RegionStateNode regionNode) { - return regionNode.isInState(RegionState.State.CLOSING); + boolean shouldDispatch = regionNode.isInState(RegionState.State.CLOSING); + LOG.info("CloseRegionProcedure will {}be dispatched for [{}] ", (shouldDispatch ? "" : "not "), regionNode); + return shouldDispatch; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java index 5582178805..c7f89b6b86 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java @@ -47,6 +47,7 @@ import org.apache.hadoop.hbase.ipc.DelegatingRpcScheduler; import org.apache.hadoop.hbase.ipc.PriorityFunction; import org.apache.hadoop.hbase.ipc.RpcScheduler; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.RSRpcServices; import org.apache.hadoop.hbase.regionserver.SimpleRpcSchedulerFactory; @@ -118,6 +119,16 @@ public class TestMetaTableAccessor { int index = UTIL.getMiniHBaseCluster().getServerWithMeta(); HRegionServer rsWithMeta = UTIL.getMiniHBaseCluster().getRegionServer(index); rsWithMeta.abort("TESTING"); + + // Wait till the meta state goes to CLOSING or CLOSED state. + RegionState metaRegionState; + do { + Thread.sleep(1); + metaRegionState = + m.getAssignmentManager().getRegionStates().getRegionState(RegionInfoBuilder.FIRST_META_REGIONINFO); + } while(!metaRegionState.isClosingOrClosedOnServer(rsWithMeta.getServerName())); + + // Make sure meta recovers. assertTrue(m.waitForMetaOnline()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java index 5ec7cc64e4..407d5b89d7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignmentManager.java @@ -214,4 +214,25 @@ public class TestAssignmentManager extends TestAssignmentManagerBase { // set it back as default, see setUpMeta() am.wakeMetaLoadedEvent(); } + + @Test + public void testAssignMetaThenCrashServer() throws Exception { + util = new HBaseTestingUtility(); + this.executor = Executors.newSingleThreadScheduledExecutor(); + setupConfiguration(util.getConfiguration()); + master = new MockMasterServices(util.getConfiguration(), this.regionsToRegionServers); + rsDispatcher = new MockRSProcedureDispatcher(master); + master.start(NSERVERS, rsDispatcher); + am = master.getAssignmentManager(); + + // Assign meta + rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); + am.assign(RegionInfoBuilder.FIRST_META_REGIONINFO); + assertEquals(true, am.isMetaAssigned()); + + RegionStateNode metaRegion = am.getRegionStates().getRegionStateNode(RegionInfoBuilder.FIRST_META_REGIONINFO); + doCrash(metaRegion.getRegionLocation()); + // Meta region should be CLOSING first before opening again. + assertEquals(metaRegion.getState(), State.CLOSING); + } } -- 2.20.1.windows.1