Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1244405) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -664,7 +664,7 @@ // Interrupt catalog tracker here in case any regions being opened out in // handlers are stuck waiting on meta or root. if (this.catalogTracker != null) this.catalogTracker.stop(); - if (this.fsOk) + if (!this.killed && this.fsOk) waitOnAllRegionsToClose(abortRequested); //fsOk flag may be changed when closing region throws exception. Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) @@ -0,0 +1,118 @@ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.TestMasterFailover; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +public class TestHRegionserverKilled { + private static final Log LOG = LogFactory.getLog(TestMasterFailover.class); + + @Test(timeout = 180000) + public void testDataCorrectnessWhenMasterFailOver() throws Exception { + final int NUM_MASTERS = 1; + final int NUM_RS = 2; + final byte[] TABLENAME = Bytes + .toBytes("testRegionCorrectnessWhenMasterFailOver"); + final byte[] FAMILY = Bytes.toBytes("family"); + final byte[][] SPLITKEYS = { Bytes.toBytes("split") }; + + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + + // Start the cluster + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + + HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILY)); + HBaseAdmin hbaseAdmin = TEST_UTIL.getHBaseAdmin(); + hbaseAdmin.createTable(desc, SPLITKEYS); + + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + + HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + List puts = new ArrayList(); + Put put1 = new Put(Bytes.toBytes("1row")); + put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + Put put2 = new Put(Bytes.toBytes("zrow")); + put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + puts.add(put1); + puts.add(put2); + + table.put(puts); + + ResultScanner resultScanner = table.getScanner(new Scan()); + int count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + int metaServerNum = cluster.getServerWithMeta(); + + /* Starting test */ + + // First abort master + cluster.abortMaster(0); + cluster.getMasterThreads().get(0).join(); + System.out.println("Master is aborted"); + + cluster.getConfiguration().setInt("hbase.master.initialization.sleep", + 10000); + HMaster master = cluster.startMaster().getMaster(); + while (!master.isLogSplittedAfterStartup()) { + Thread.sleep(1000); + } + master.getRegionServerTracker().setDelayTime(60 * 1000); + System.out.println("splitted:" + master.isLogSplittedAfterStartup() + + ",initialized:" + master.isInitialized()); + + // Second kill meta server + cluster.getRegionServer(metaServerNum).kill(); + System.out.println("Killing regionserver"); + while (!master.isInitialized()) { + Thread.sleep(1000); + } + System.out.println("master isInitialized"); + // Third check whether data is correct in meta region + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + resultScanner = table.getScanner(new Scan()); + count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + TEST_UTIL.shutdownMiniCluster(); + + } + +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1244405) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -52,7 +52,6 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; -import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.Result; @@ -157,6 +156,8 @@ private volatile boolean isActiveMaster = false; // flag set after we complete initialization once active (used for testing) private volatile boolean initialized = false; + // flag set after we complete splitting log after startup (used for testing) + private volatile boolean logSplitted = false; // Instance of the hbase executor service. ExecutorService executorService; @@ -380,6 +381,12 @@ // TODO: Should do this in background rather than block master startup this.fileSystemManager. splitLogAfterStartup(this.serverManager.getOnlineServers()); + this.logSplitted = true; + // used for test + int sleepAfterSplitLog = conf.getInt("hbase.master.initialization.sleep", 0); + if (sleepAfterSplitLog > 0) { + Thread.sleep(sleepAfterSplitLog); + } // Make sure root and meta assigned before proceeding. assignRootAndMeta(); @@ -553,6 +560,10 @@ return this.zooKeeper; } + public RegionServerTracker getRegionServerTracker() { + return this.regionServerTracker; + } + /* * Start up all services. If any of these threads gets an unhandled exception * then they just die with a logged message. This should be fine because @@ -1058,6 +1069,15 @@ return initialized; } + /** + * This method is used for testing. + * + * @return true if log is splitted after startup. + */ + public boolean isLogSplittedAfterStartup() { + return logSplitted; + } + @Override public void assign(final byte [] regionName, final boolean force) throws IOException { @@ -1118,4 +1138,5 @@ public static void main(String [] args) throws Exception { new HMasterCommandLine(HMaster.class).doMain(args); } + } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (revision 1244405) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (working copy) @@ -44,6 +44,8 @@ private ServerManager serverManager; private Abortable abortable; + // used for test + private int delayTime = 0; public RegionServerTracker(ZooKeeperWatcher watcher, Abortable abortable, ServerManager serverManager) { @@ -64,9 +66,20 @@ ZKUtil.watchAndGetNewChildren(watcher, watcher.rsZNode); } + public void setDelayTime(int delayTime) { + this.delayTime = delayTime; + } + @Override public void nodeDeleted(String path) { if(path.startsWith(watcher.rsZNode)) { + if (delayTime > 0) { + // used for testing + try { + Thread.sleep(delayTime); + } catch (InterruptedException e) { + } + } String serverName = ZKUtil.getNodeName(path); LOG.info("RegionServer ephemeral node deleted, processing expiration [" + serverName + "]");