diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index dcbce23..6893a9e 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -394,7 +394,7 @@ public class AssignmentManager { * @throws IOException * @throws KeeperException * @throws InterruptedException - * @throws CoordinatedStateException + * @throws CoordinatedStateException */ void joinCluster() throws IOException, KeeperException, InterruptedException, CoordinatedStateException { @@ -891,12 +891,16 @@ public class AssignmentManager { LOG.warn("Server " + server + " region CLOSE RPC returned false for " + region.getRegionNameAsString()); } catch (Throwable t) { + long sleepTime = 0; + Configuration conf = this.server.getConfiguration(); if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); } - if (t instanceof NotServingRegionException + if (t instanceof RegionServerAbortedException || t instanceof RegionServerStoppedException || t instanceof ServerNotRunningYetException) { + + } else if (t instanceof NotServingRegionException) { LOG.debug("Offline " + region.getRegionNameAsString() + ", it's not any more on " + server, t); regionStates.updateRegionState(region, State.OFFLINE); @@ -904,22 +908,23 @@ public class AssignmentManager { } else if (t instanceof FailedServerException && i < maximumAttempts) { // In case the server is in the failed server list, no point to // retry too soon. Retry after the failed_server_expiry time - try { - Configuration conf = this.server.getConfiguration(); - long sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, - RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); - if (LOG.isDebugEnabled()) { - LOG.debug(server + " is on failed server list; waiting " + sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, + RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); + if (LOG.isDebugEnabled()) { + LOG.debug(server + " is on failed server list; waiting " + sleepTime + "ms", t); - } + } + } + try { + if (sleepTime > 0) { Thread.sleep(sleepTime); - } catch (InterruptedException ie) { - LOG.warn("Failed to unassign " - + region.getRegionNameAsString() + " since interrupted", ie); - regionStates.updateRegionState(region, State.FAILED_CLOSE); - Thread.currentThread().interrupt(); - return; } + } catch (InterruptedException ie) { + LOG.warn("Failed to unassign " + + region.getRegionNameAsString() + " since interrupted", ie); + regionStates.updateRegionState(region, State.FAILED_CLOSE); + Thread.currentThread().interrupt(); + return; } LOG.info("Server " + server + " returned " + t + " for " diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 6b68bfe..264e62f 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -982,12 +982,6 @@ public class TestAssignmentManagerOnCluster { assertTrue(regionStates.isRegionOnline(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); - // Try to unassign the dead region before SSH - am.unassign(hri); - // The region should be moved to offline since the server is dead - RegionState state = regionStates.getRegionState(hri); - assertTrue(state.isOffline()); - // Kill the hosting server, which doesn't have meta on it. cluster.killRegionServer(oldServerName); cluster.waitForRegionServerToStop(oldServerName, -1); @@ -1061,12 +1055,6 @@ public class TestAssignmentManagerOnCluster { assertTrue(regionStates.isRegionOnline(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); - // Try to unassign the dead region before SSH - am.unassign(hri); - // The region should be moved to offline since the server is dead - RegionState state = regionStates.getRegionState(hri); - assertTrue(state.isOffline()); - // Disable the table now. master.disableTable(hri.getTable());