From 0a1670898dd554b4d16ada1545a83fad8f1dcfae Mon Sep 17 00:00:00 2001 From: Sergey Shelukhin Date: Wed, 9 Jan 2019 10:37:59 +0800 Subject: [PATCH] HBASE-21614 RIT recovery with ServerCrashProcedure doesn't account for all regions --- .../hadoop/hbase/master/RegionState.java | 9 +++++++++ .../master/assignment/AssignmentManager.java | 7 +++++-- .../master/assignment/RegionStateNode.java | 18 ++++++++++-------- .../master/procedure/ServerCrashProcedure.java | 3 +++ 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java index 745e1ea134..9e933661c8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java @@ -59,6 +59,15 @@ public class RegionState { // apply it to a region in this state, as it may lead to data loss as we // may have some data in recovered edits. + public boolean matches(State... expected) { + for (State state : expected) { + if (this == state) { + return true; + } + } + return false; + } + /** * Convert to protobuf ClusterStatusProtos.RegionState.State */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index b7c2203051..85fffc340b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1293,14 +1293,17 @@ public class AssignmentManager { } RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); // Do not need to lock on regionNode, as we can make sure that before we finish loading - // meta, all the related procedures can not be executed. The only exception is formeta + // meta, all the related procedures can not be executed. The only exception is for meta // region related operations, but here we do not load the informations for meta region. regionNode.setState(localState); regionNode.setLastHost(lastHost); regionNode.setRegionLocation(regionLocation); regionNode.setOpenSeqNum(openSeqNum); - if (localState == State.OPEN) { + // Note: keep consistent with other methods, see region(Opening|Opened|Closing) + // RIT/ServerCrash handling should take care of the transiting regions. + if (localState.matches(State.OPEN, State.OPENING, State.CLOSING, State.SPLITTING, + State.MERGING)) { assert regionLocation != null : "found null region location for " + regionNode; regionStates.addRegionToServer(regionNode); } else if (localState == State.OFFLINE || regionInfo.isOffline()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java index 81e6f784e3..087d9bcf33 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java @@ -146,15 +146,17 @@ public class RegionStateNode implements Comparable { } } - public boolean isInState(final State... expected) { - if (expected != null && expected.length > 0) { - boolean expectedState = false; - for (int i = 0; i < expected.length; ++i) { - expectedState |= (getState() == expected[i]); - } - return expectedState; + /** + * Notice that, we will return true if {@code expected} is empty. + *

+ * This is a bit strange but we need this logic, for example, we can change the state to OPENING + * from any state, as in SCP we will not change the state to CLOSED before opening the region. + */ + public boolean isInState(State... expected) { + if (expected.length == 0) { + return true; } - return true; + return getState().matches(expected); } public boolean isStuck() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 20727279c5..bee90cda2b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -172,6 +172,9 @@ public class ServerCrashProcedure services.getAssignmentManager().getRegionsOnServer(serverName); // Where to go next? Depends on whether we should split logs at all or // if we should do distributed log splitting. + if (regionsOnCrashedServer != null) { + LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size()); + } if (!this.shouldSplitWal) { setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); } else { -- 2.17.1