From f66bd017473823ed10d40071360be71344550865 Mon Sep 17 00:00:00 2001 From: Sergey Shelukhin Date: Wed, 9 Jan 2019 10:37:59 +0800 Subject: [PATCH] HBASE-21614 RIT recovery with ServerCrashProcedure doesn't account for all regions --- .../org/apache/hadoop/hbase/master/RegionState.java | 9 +++++++++ .../hbase/master/assignment/AssignmentManager.java | 7 +++++-- .../hbase/master/assignment/RegionStateNode.java | 11 ++--------- .../hbase/master/procedure/ServerCrashProcedure.java | 3 +++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java index 745e1ea134..9e933661c8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java @@ -59,6 +59,15 @@ public class RegionState { // apply it to a region in this state, as it may lead to data loss as we // may have some data in recovered edits. + public boolean matches(State... expected) { + for (State state : expected) { + if (this == state) { + return true; + } + } + return false; + } + /** * Convert to protobuf ClusterStatusProtos.RegionState.State */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index b7c2203051..85fffc340b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -1293,14 +1293,17 @@ public class AssignmentManager { } RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); // Do not need to lock on regionNode, as we can make sure that before we finish loading - // meta, all the related procedures can not be executed. The only exception is formeta + // meta, all the related procedures can not be executed. The only exception is for meta // region related operations, but here we do not load the informations for meta region. regionNode.setState(localState); regionNode.setLastHost(lastHost); regionNode.setRegionLocation(regionLocation); regionNode.setOpenSeqNum(openSeqNum); - if (localState == State.OPEN) { + // Note: keep consistent with other methods, see region(Opening|Opened|Closing) + // RIT/ServerCrash handling should take care of the transiting regions. + if (localState.matches(State.OPEN, State.OPENING, State.CLOSING, State.SPLITTING, + State.MERGING)) { assert regionLocation != null : "found null region location for " + regionNode; regionStates.addRegionToServer(regionNode); } else if (localState == State.OFFLINE || regionInfo.isOffline()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java index 81e6f784e3..8f282f7fc8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java @@ -146,15 +146,8 @@ public class RegionStateNode implements Comparable { } } - public boolean isInState(final State... expected) { - if (expected != null && expected.length > 0) { - boolean expectedState = false; - for (int i = 0; i < expected.length; ++i) { - expectedState |= (getState() == expected[i]); - } - return expectedState; - } - return true; + public boolean isInState(State... expected) { + return getState().matches(expected); } public boolean isStuck() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 20727279c5..bee90cda2b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -172,6 +172,9 @@ public class ServerCrashProcedure services.getAssignmentManager().getRegionsOnServer(serverName); // Where to go next? Depends on whether we should split logs at all or // if we should do distributed log splitting. + if (regionsOnCrashedServer != null) { + LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size()); + } if (!this.shouldSplitWal) { setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); } else { -- 2.17.1