diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 61ce9f7..9702837 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -3081,7 +3081,8 @@ public class AssignmentManager extends ZooKeeperListener { regionStates.getRegionTransitionState(encodedName); if (regionState == null || (regionState.getServerName() != null && !regionState.isOnServer(sn)) - || !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening())) { + || !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening() || regionState + .isOffline())) { LOG.info("Skip " + regionState + " since it is not opening/failed_close" + " on the dead server any more: " + sn); it.remove(); diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index c09fe84..11bddbb 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -513,8 +513,8 @@ public class RegionStates { // pending open on this server, was open on another one. // It could be in failed_close state too if tried several times // to open it while the server is not reachable. - if (state.isPendingOpenOrOpening() || state.isFailedClose()) { - LOG.info("Found opening region " + state + " to be reassigned by SSH for " + sn); + if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) { + LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn); rits.add(hri); } else { LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index 532bb21..34844ed 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -29,6 +29,8 @@ import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -39,9 +41,11 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; @@ -60,6 +64,7 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -70,6 +75,7 @@ import org.junit.experimental.categories.Category; */ @Category(MediumTests.class) public class TestAssignmentManagerOnCluster { + private static final Log LOG = LogFactory.getLog(TestAssignmentManagerOnCluster.class); private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private final static Configuration conf = TEST_UTIL.getConfiguration(); @@ -760,6 +766,66 @@ public class TestAssignmentManagerOnCluster { } } + /** + * This tests a RIT in offline state will get re-assigned after a master restart + */ + @Test(timeout = 60000) + public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception { + final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart"); + final HRegionInfo hri = createTableAndGetOneRegion(table); + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + RegionStates regionStates = master.getAssignmentManager().getRegionStates(); + ServerName serverName = regionStates.getRegionServerOfRegion(hri); + TEST_UTIL.assertRegionOnServer(hri, serverName, 200); + + ServerName dstName = null; + for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) { + if (!tmpServer.equals(serverName)) { + dstName = tmpServer; + break; + } + } + // find a different server + assertTrue(dstName != null); + // shutdown HBase cluster + TEST_UTIL.shutdownMiniHBaseCluster(); + // create a RIT node in offline state + ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher(); + ZKAssign.createNodeOffline(zkw, hri, dstName); + Stat stat = new Stat(); + byte[] data = + ZKAssign.getDataNoWatch(TEST_UTIL.getZooKeeperWatcher(), hri.getEncodedName(), stat); + assertTrue(data != null); + RegionTransition rt = RegionTransition.parseFrom(data); + assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE); + + LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName + + " and dst server=" + dstName); + + // start HBase cluster + TEST_UTIL.startMiniHBaseCluster(1, 4, MyMaster.class, null); + + // wait for the region is re-assigned. + TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); + if (master != null && master.isInitialized()) { + ServerManager serverManager = master.getServerManager(); + return !serverManager.areDeadServersInProgress(); + } + return false; + } + }); + + // verify the region is assigned + master = TEST_UTIL.getHBaseCluster().getMaster(); + master.getAssignmentManager().waitForAssignment(hri); + regionStates = master.getAssignmentManager().getRegionStates(); + RegionState newState = regionStates.getRegionState(hri); + assertTrue(newState.isOpened()); + } + static class MyLoadBalancer extends StochasticLoadBalancer { // For this region, if specified, always assign to nowhere static volatile String controledRegion = null;