Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java (revision 1354326) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java (working copy) @@ -19,30 +19,47 @@ */ package org.apache.hadoop.hbase.master; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.DeserializationException; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.RegionTransition; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventHandler.EventHandlerListener; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; +import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.monitoring.TaskMonitor; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.Pair; - -import java.util.List; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; +import org.junit.experimental.categories.Category; import com.google.common.base.Joiner; -import org.junit.experimental.categories.Category; -import static org.junit.Assert.*; - @Category(MediumTests.class) public class TestMaster { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @@ -53,7 +70,7 @@ @BeforeClass public static void beforeAllTests() throws Exception { // Start a cluster of two regionservers. - TEST_UTIL.startMiniCluster(1); + TEST_UTIL.startMiniCluster(2); } @AfterClass @@ -62,6 +79,78 @@ } @Test + public void testFixupDaughtersTwice() throws Exception { + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster master = cluster.getMaster(); + byte[] tableName=Bytes.toBytes("TestFixupDaughtersTwice"); + HTable ht = TEST_UTIL.createTable(tableName,FAMILYNAME); + HRegionInfo parent=cluster.getRegions(tableName).get(0).getRegionInfo(); + byte[] splitKey = Bytes.toBytes("splitKey"); + final HRegionInfo daughterA = new HRegionInfo(tableName, parent.getStartKey(), + splitKey); + HRegionInfo daughterB = new HRegionInfo(tableName, splitKey, + parent.getEndKey()); + MetaEditor.offlineParentInMeta(master.getCatalogTracker(), parent, + daughterA, daughterB); + MonitoredTask status = TaskMonitor.get().createStatus( + "TestFixupDaughtersTwice"); + master.fixupDaughters(status); + + Thread t = new Thread() { + public void run() { + RegionState daughterARegionState = master.getAssignmentManager() + .getRegionsInTransition().get(daughterA.getEncodedName()); + assertTrue(daughterARegionState != null); + long maxWaitTime = System.currentTimeMillis() + 20 * 1000; + synchronized (daughterARegionState) { + while (!daughterARegionState.isOpened()) { + try { + Thread.sleep(5); + byte[] data = ZKAssign.getDataNoWatch(master.getZooKeeper(), + daughterA.getEncodedName(), new Stat()); + if (data != null) { + RegionTransition rt = RegionTransition.parseFrom(data); + if (rt.getEventType() == EventHandler.EventType.RS_ZK_REGION_OPENED) { + // Make a new region plan + master.getAssignmentManager().getRegionPlan(daughterARegionState, + daughterARegionState.getServerName(), true); + return; + } + } + if (System.currentTimeMillis() > maxWaitTime) { + return; + } + } catch (Exception e) { + return; + } + } + } + } + }; + t.start(); + // Fixup daughter twice, it may be caused by ServerShutdownHandler or master + // restart after last fixup + master.fixupDaughters(status); + + long maxWaitTime = System.currentTimeMillis() + 30 * 1000; + while (master.getAssignmentManager().isRegionsInTransition()) { + Thread.sleep(50); + long now = System.currentTimeMillis(); + if (now > maxWaitTime) { + fail("Region has been in transition for " + maxWaitTime + + "ms, it must be something wrong"); + } + } + int daughterAAssigned = 0; + for (RegionServerThread rst : cluster.getRegionServerThreads()) { + if (rst.getRegionServer().getOnlineRegion(daughterA.getRegionName()) != null) { + daughterAAssigned++; + } + } + assertEquals(1, daughterAAssigned); + } + + @Test public void testMasterOpsWhileSplitting() throws Exception { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HMaster m = cluster.getMaster(); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1354326) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -439,7 +439,8 @@ if (daughter == null) return 0; if (isDaughterMissing(catalogTracker, daughter)) { LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); - MetaEditor.addDaughter(catalogTracker, daughter, null); + ServerName parentServerName = MetaReader.getServerNameFromCatalogResult(result); + MetaEditor.addDaughter(catalogTracker, daughter, parentServerName); // TODO: Log WARN if the regiondir does not exist in the fs. If its not // there then something wonky about the split -- things will keep going