diff --git hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAbortedException.java hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAbortedException.java new file mode 100644 index 0000000..ddc2270 --- /dev/null +++ hbase-client/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServerAbortedException.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; + +/** + * Thrown by the region server when it is aborting. + */ +@SuppressWarnings("serial") +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class RegionServerAbortedException extends RegionServerStoppedException { + public RegionServerAbortedException(String s) { + super(s); + } +} diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 2091809..e2c1a80 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1873,14 +1873,14 @@ public class AssignmentManager extends ZooKeeperListener { t = ((RemoteException)t).unwrapRemoteException(); } boolean logRetries = true; - if (t instanceof RegionServerAbortedException) { - // RS is aborting, we cannot offline the region since the region may need to do WAL - // recovery. Until we see the RS expiration, we should retry. + if (t instanceof RegionServerAbortedException + || t instanceof RegionServerStoppedException) { + // RS is aborting or stopping, we cannot offline the region since the region may need + // to do WAL recovery. Until we see the RS expiration, we should retry. sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY, RpcClient.FAILED_SERVER_EXPIRY_DEFAULT); } else if (t instanceof NotServingRegionException - || t instanceof RegionServerStoppedException || t instanceof ServerNotRunningYetException) { LOG.debug("Offline " + region.getRegionNameAsString() + ", it's not any more on " + server, t); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index e892ce7..689cac8 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -218,7 +218,7 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.deleteTable(Bytes.toBytes(table)); } } - + // Simulate a scenario where the AssignCallable and SSH are trying to assign a region @Test (timeout=60000) public void testAssignRegionBySSH() throws Exception { @@ -248,15 +248,15 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer); TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1); AssignmentManager am = master.getAssignmentManager(); - + // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state, - // but not in transition and the server is the dead 'controlledServer' + // but not in transition and the server is the dead 'controlledServer' regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null); am.assign(hri, true, true); // Region should remain OFFLINE and go to transition assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState()); assertTrue (regionStates.isRegionInTransition(hri)); - + master.enableSSH(true); am.waitForAssignment(hri); assertTrue (regionStates.getRegionState(hri).isOpened()); @@ -336,7 +336,7 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName); TEST_UTIL.getMiniHBaseCluster().startMaster(); // Wait till master is active and is initialized - while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || + while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { Threads.sleep(1); } @@ -847,7 +847,7 @@ public class TestAssignmentManagerOnCluster { List regions = new ArrayList(); regions.add(hri); am.assign(destServerName, regions); - + // let region open continue MyRegionObserver.postOpenEnabled.set(false); @@ -1028,12 +1028,6 @@ public class TestAssignmentManagerOnCluster { assertTrue(regionStates.isRegionOnline(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); - // Try to unassign the dead region before SSH - am.unassign(hri, false); - // The region should be moved to offline since the server is dead - RegionState state = regionStates.getRegionState(hri); - assertTrue(state.isOffline()); - // Kill the hosting server, which doesn't have meta on it. cluster.killRegionServer(oldServerName); cluster.waitForRegionServerToStop(oldServerName, -1); @@ -1159,12 +1153,6 @@ public class TestAssignmentManagerOnCluster { assertTrue(regionStates.isRegionOnline(hri)); assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri)); - // Try to unassign the dead region before SSH - am.unassign(hri, false); - // The region should be moved to offline since the server is dead - RegionState state = regionStates.getRegionState(hri); - assertTrue(state.isOffline()); - // Disable the table now. master.disableTable(hri.getTable());