diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 79ffc8a..f21060a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -49,11 +49,13 @@ import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; -import org.apache.hadoop.hbase.client.ClusterConnection; -import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RetriesExhaustedException; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.ipc.HBaseRpcController; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.RegionTransitionProcedure; +import org.apache.hadoop.hbase.master.procedure.ServerCrashException; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -572,6 +574,10 @@ public class ServerManager { if (!master.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " + "delay expiring server " + serverName); + // Even we delay expire this server, we still need to handle Meta's RIT + // that are up against the crashed server; since when we do RecoverMetaProcedure, + // the SCP is not enable yet and Meta's RIT may be suspend forever. See HBase-19287 + handleMetaRITOnCrashedServer(serverName); this.queuedDeadServers.add(serverName); return; } @@ -603,6 +609,31 @@ public class ServerManager { } } } + /** + * Handle RIT of meta region against crashed server + * Only used when ServerCrashProcedure is not enabled. + * @param serverName + */ + public void handleMetaRITOnCrashedServer(ServerName serverName) { + RegionInfo hri = RegionReplicaUtil + .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, RegionInfo.DEFAULT_REPLICA_ID); + AssignmentManager am = master.getAssignmentManager(); + RegionState regionStateNode = am.getRegionStates().getRegionState(hri); + if (!regionStateNode.getServerName().equals(serverName)) { + return; + } + // meta has been assigned to crashed server. + LOG.info("Meta has been assigned to crashed server: " + serverName + "; will do re-assign"); + // handle failure and wake event + RegionTransitionProcedure rtp = am.getRegionStates().getRegionTransitionProcedure(hri); + // There are running rtp for assign meta + // Not need to consider for REGION_TRANSITION_QUEUE step + if (rtp != null && rtp.isMeta() + && rtp.getTransitionState() == RegionTransitionState.REGION_TRANSITION_DISPATCH) { + rtp.remoteCallFailed(master.getMasterProcedureExecutor().getEnvironment(), serverName, + new ServerCrashException(rtp.getProcId(), serverName)); + } + } @VisibleForTesting public void moveFromOnlineToDeadServers(final ServerName sn) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index 6bb2cba..7c66b17 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -138,7 +138,7 @@ public abstract class RegionTransitionProcedure this.transitionState = state; } - RegionTransitionState getTransitionState() { + public RegionTransitionState getTransitionState() { return transitionState; }