Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -29,12 +29,12 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -69,7 +69,6 @@ import org.apache.hadoop.hbase.ipc.HMasterRegionInterface; import org.apache.hadoop.hbase.ipc.ProtocolSignature; import org.apache.hadoop.hbase.ipc.RpcServer; -import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; @@ -184,6 +183,8 @@ private volatile boolean isActiveMaster = false; // flag set after we complete initialization once active (used for testing) private volatile boolean initialized = false; + // flag set after we complete splitting log after startup (used for testing) + private volatile boolean logSplitted = false; // Instance of the hbase executor service. ExecutorService executorService; @@ -514,6 +515,12 @@ status.setStatus("Splitting logs after master startup"); this.fileSystemManager. splitLogAfterStartup(this.serverManager.getOnlineServers().keySet()); + this.logSplitted = true; + // used for test + int sleepAfterSplitLog = conf.getInt("hbase.master.initialization.sleep", 0); + if (sleepAfterSplitLog > 0) { + Thread.sleep(sleepAfterSplitLog); + } // Make sure root and meta assigned before proceeding. assignRootAndMeta(status); @@ -1638,7 +1645,16 @@ public boolean isInitialized() { return initialized; } - + + /** + * This method is used for testing. + * + * @return true if log is splitted after startup. + */ + public boolean isLogSplittedAfterStartup() { + return logSplitted; + } + @Override @Deprecated public void assign(final byte[] regionName, final boolean force) Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -83,7 +83,6 @@ import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Action; import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -93,6 +92,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Row; +import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; @@ -756,7 +756,7 @@ // Interrupt catalog tracker here in case any regions being opened out in // handlers are stuck waiting on meta or root. if (this.catalogTracker != null) this.catalogTracker.stop(); - if (this.fsOk) { + if (!this.killed && this.fsOk) { waitOnAllRegionsToClose(abortRequested); LOG.info("stopping server " + this.serverNameFromMasterPOV + "; all regions closed."); Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.TestMasterFailover; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +public class TestHRegionserverKilled { + private static final Log LOG = LogFactory.getLog(TestMasterFailover.class); + + @Test(timeout = 180000) + public void testDataCorrectnessWhenMasterFailOver() throws Exception { + final int NUM_MASTERS = 1; + final int NUM_RS = 2; + final byte[] TABLENAME = Bytes + .toBytes("testRegionCorrectnessWhenMasterFailOver"); + final byte[] FAMILY = Bytes.toBytes("family"); + final byte[][] SPLITKEYS = { Bytes.toBytes("split") }; + + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + + // Start the cluster + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + + HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILY)); + HBaseAdmin hbaseAdmin = TEST_UTIL.getHBaseAdmin(); + hbaseAdmin.createTable(desc, SPLITKEYS); + + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + List regionList = hbaseAdmin.getTableRegions(TABLENAME); + for (HRegionInfo hri : regionList) { + System.out.println(hri); + } + + HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + List puts = new ArrayList(); + Put put1 = new Put(Bytes.toBytes("1row")); + put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + Put put2 = new Put(Bytes.toBytes("zrow")); + put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + puts.add(put1); + puts.add(put2); + + table.put(puts); + + ResultScanner resultScanner = table.getScanner(new Scan()); + int count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + int metaServerNum = cluster.getServerWithMeta(); + + /* Starting test */ + + // First abort master + cluster.abortMaster(0); + cluster.getMasterThreads().get(0).join(); + System.out.println("Master is aborted"); + + cluster.getConfiguration().setInt("hbase.master.initialization.sleep", + 10000); + HMaster master = cluster.startMaster().getMaster(); + while (!master.isLogSplittedAfterStartup()) { + Thread.sleep(1000); + } + master.getRegionServerTracker().setDelayTime(60 * 1000); + System.out.println("splitted:" + master.isLogSplittedAfterStartup() + + ",initialized:" + master.isInitialized()); + + // Second kill meta server + cluster.getRegionServer(metaServerNum).kill(); + System.out.println("Killing regionserver"); + while (!master.isInitialized()) { + Thread.sleep(1000); + } + System.out.println("master isInitialized"); + // Third check whether data is correct in meta region + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + resultScanner = table.getScanner(new Scan()); + count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + TEST_UTIL.shutdownMiniCluster(); + + } + +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (working copy) @@ -47,6 +47,8 @@ private NavigableSet regionServers = new TreeSet(); private ServerManager serverManager; private Abortable abortable; + // used for test + private int delayTime = 0; public RegionServerTracker(ZooKeeperWatcher watcher, Abortable abortable, ServerManager serverManager) { @@ -86,9 +88,20 @@ } } + public void setDelayTime(int delayTime) { + this.delayTime = delayTime; + } + @Override public void nodeDeleted(String path) { if (path.startsWith(watcher.rsZNode)) { + if (delayTime > 0) { + // used for testing + try { + Thread.sleep(delayTime); + } catch (InterruptedException e) { + } + } String serverName = ZKUtil.getNodeName(path); LOG.info("RegionServer ephemeral node deleted, processing expiration [" + serverName + "]");