Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -26,15 +26,16 @@ import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -69,7 +70,6 @@ import org.apache.hadoop.hbase.ipc.HMasterRegionInterface; import org.apache.hadoop.hbase.ipc.ProtocolSignature; import org.apache.hadoop.hbase.ipc.RpcServer; -import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator; import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DeleteTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; @@ -184,6 +184,8 @@ private volatile boolean isActiveMaster = false; // flag set after we complete initialization once active (used for testing) private volatile boolean initialized = false; + // flag set after we complete splitting log after startup (used for testing) + private volatile boolean logSplitted = false; // Instance of the hbase executor service. ExecutorService executorService; @@ -503,17 +505,34 @@ // Check zk for regionservers that are up but didn't register for (ServerName sn: this.regionServerTracker.getOnlineServers()) { if (!this.serverManager.isServerOnline(sn)) { - // Not registered; add it. - LOG.info("Registering server found up in zk but who has not yet " + - "reported in: " + sn); - this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); + ServerName existingServer = ServerName.findServerWithSameHostnamePort( + serverManager.getOnlineServersList(), sn); + if (existingServer == null) { + // Not registered; add it. + LOG.info("Registering server found up in zk but who has not yet " + + "reported in: " + sn); + this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); + } } } + Set knownServers = new HashSet(); + knownServers.addAll(serverManager.getOnlineServers().keySet()); + if (this.serverManager.areDeadServersInProgress()) { + // Dead servers are under processing, their logs would be split by + // ServerShutdownHandler + knownServers.addAll(serverManager.getDeadServersInProgress()); + } + // TODO: Should do this in background rather than block master startup status.setStatus("Splitting logs after master startup"); - this.fileSystemManager. - splitLogAfterStartup(this.serverManager.getOnlineServers().keySet()); + this.fileSystemManager.splitLogAfterStartup(knownServers); + this.logSplitted = true; + // used for test + int sleepAfterSplitLog = conf.getInt("hbase.master.initialization.sleep", 0); + if (sleepAfterSplitLog > 0) { + Thread.sleep(sleepAfterSplitLog); + } // Make sure root and meta assigned before proceeding. assignRootAndMeta(status); @@ -526,7 +545,7 @@ // Fixup assignment manager status status.setStatus("Starting assignment manager"); - this.assignmentManager.joinCluster(); + this.assignmentManager.joinCluster(knownServers); this.balancer.setClusterStatus(getClusterStatus()); this.balancer.setMasterServices(this); @@ -579,12 +598,16 @@ ServerName expiredServer = null; if (!catalogTracker.verifyRootRegionLocation(timeout)) { ServerName currentRootServer = this.catalogTracker.getRootLocation(); - if (expireIfOnline(currentRootServer)) { - // We are expiring this server. The processing of expiration will assign - // root so don't do it here. - expiredServer = currentRootServer; - } else { - // Root was not on an online server when we failed verification + // We are expiring root server if it is online. + if (!expireIfOnline(currentRootServer, true, false)) { + // CurrentRootServer is not online, but it may being processed as dead + // server. Before assign root, we need to wait until its log is + // splitted. + waitUntilNoLogDir(currentRootServer); + } + if (!this.serverManager.isDeadRootServerInProgress()) { + // No dead root server is being processed by (@link + // MetaServerShutdownHandler) this.assignmentManager.assignRoot(); } this.catalogTracker.waitForRoot(); @@ -606,11 +629,14 @@ if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) { ServerName currentMetaServer = this.catalogTracker.getMetaLocationOrReadLocationFromRoot(); - if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) { - // We are expiring the server that is carrying meta already. - // The expiration processing will take care of reassigning meta. - expireIfOnline(currentMetaServer); - } else { + // We are expiring meta server if it is online. + if (!expireIfOnline(currentMetaServer, false, true)) { + // CurrentMetaServer is not online, but it may being processed as dead + // server. Before assign root, we need to wait until its log is + // splitted. + waitUntilNoLogDir(currentMetaServer); + } + if (!this.serverManager.isDeadMetaServerInProgress()) { this.assignmentManager.assignMeta(); } this.catalogTracker.waitForMeta(); @@ -664,17 +690,53 @@ /** * Expire a server if we find it is one of the online servers set. + * * @param sn ServerName to check. + * @param definitiveRootServer true if it's dead server carrying root certainly. + * @param definitiveMetaServer true if it's dead server carrying meta certainly. * @return True if server was online and so we expired it as unreachable. + * @throws IOException */ - private boolean expireIfOnline(final ServerName sn) { + private boolean expireIfOnline(final ServerName sn, + boolean definitiveRootServer, boolean definitiveMetaServer) throws IOException { if (sn == null) return false; if (!this.serverManager.isServerOnline(sn)) return false; LOG.info("Forcing expiration of " + sn); - this.serverManager.expireServer(sn); + if (definitiveRootServer || definitiveMetaServer) { + // We should split hlog in non-distributed fashion because of high + // priority since it is root or meta server. + this.fileSystemManager.splitLog(sn, true); + } + this.serverManager.expireServer(sn, definitiveRootServer, definitiveMetaServer); return true; } + /** + * Wait until server's log dir doesn't exist or time out. + * + */ + private void waitUntilNoLogDir(final ServerName serverName) + throws IOException, InterruptedException { + if (serverName != null) { + LOG.debug("Wait for " + serverName + " log dir to not exist: " + + this.fileSystemManager.logDirExists(serverName)); + } + int waitTime = conf.getInt("hbase.master.meta.assignment.timeout", 300000); + long start = System.currentTimeMillis(); + for (; serverName != null; ) { + if (this.fileSystemManager.logDirExists(serverName) == null) { + break; + } + if (System.currentTimeMillis() - start > waitTime) { + throw new RuntimeException( + "Timed out waiting to finish splitting log for " + serverName); + } + Thread.sleep(1000); + } + LOG.debug("Spent " + (System.currentTimeMillis() - start) + " ms waiting"); + + } + @Override public ProtocolSignature getProtocolSignature( String protocol, long version, int clientMethodsHashCode) @@ -1638,7 +1700,16 @@ public boolean isInitialized() { return initialized; } - + + /** + * This method is used for testing. + * + * @return true if log is splitted after startup. + */ + public boolean isLogSplittedAfterStartup() { + return logSplitted; + } + @Override @Deprecated public void assign(final byte[] regionName, final boolean force) Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -83,7 +83,6 @@ import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Action; import org.apache.hadoop.hbase.client.Append; -import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -93,6 +92,7 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Row; +import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.coprocessor.Exec; import org.apache.hadoop.hbase.client.coprocessor.ExecResult; @@ -756,7 +756,7 @@ // Interrupt catalog tracker here in case any regions being opened out in // handlers are stuck waiting on meta or root. if (this.catalogTracker != null) this.catalogTracker.stop(); - if (this.fsOk) { + if (!this.killed && this.fsOk) { waitOnAllRegionsToClose(abortRequested); LOG.info("stopping server " + this.serverNameFromMasterPOV + "; all regions closed."); Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (working copy) @@ -20,15 +20,19 @@ package org.apache.hadoop.hbase.master; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.commons.lang.NotImplementedException; import org.apache.hadoop.hbase.ServerName; /** - * Class to hold dead servers list and utility querying dead server list. + * Class to hold dead servers list, utility querying dead server list and being + * processed dead servers by the ServerShutdownHandler. */ public class DeadServer implements Set { /** @@ -40,12 +44,28 @@ */ private final Set deadServers = new HashSet(); + private enum ServerType { + NORMAL, // Normal dead server + ROOT, // The dead server carried -ROOT- + META // The dead server carried .META. + } + + /** + * Dead servers under processing by the ServerShutdownHander. Map of + * ServerType to set of dead servers being processed + */ + private final Map> deadServersInProgress + = new HashMap>(); + /** Number of dead servers currently being processed */ private int numProcessing; public DeadServer() { super(); this.numProcessing = 0; + for (ServerType svrType : ServerType.values()) { + deadServersInProgress.put(svrType, new HashSet()); + } } /** @@ -95,19 +115,58 @@ return numProcessing != 0; } + /** + * @return true if root server is being processed as dead. + */ + public boolean isDeadRootServerInProgress() { + return !deadServersInProgress.get(ServerType.ROOT).isEmpty(); + } + + /** + * @return true if meta server is being processed as dead. + */ + public boolean isDeadMetaServerInProgress() { + return !deadServersInProgress.get(ServerType.META).isEmpty(); + } + public synchronized Set clone() { Set clone = new HashSet(this.deadServers.size()); clone.addAll(this.deadServers); return clone; } + synchronized Set getDeadServersInProgress() { + Set clone = new HashSet(); + clone.addAll(this.deadServersInProgress.get(ServerType.NORMAL)); + return clone; + } + public synchronized boolean add(ServerName e) { this.numProcessing++; + deadServersInProgress.get(ServerType.NORMAL).add(e); return deadServers.add(e); } + /** + * Add server to set of dead root servers if carryingRoot or set of dead meta + * servers if carryingMeta + */ + public synchronized void add(ServerName server, boolean carryingRoot, + boolean carryingMeta) { + if (carryingRoot) { + deadServersInProgress.get(ServerType.ROOT).add(server); + } + if (carryingMeta) { + deadServersInProgress.get(ServerType.META).add(server); + } + } + public synchronized void finish(ServerName e) { this.numProcessing--; + for (Entry> deadServerInProgress : deadServersInProgress + .entrySet()) { + deadServerInProgress.getValue().remove(e); + } } public synchronized int size() { Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -301,13 +301,15 @@ } /** - * Called on startup. - * Figures whether a fresh cluster start of we are joining extant running cluster. + * Called on startup. Figures whether a fresh cluster start or we are joining + * extant running cluster. + * @param onlineServers onlined servers when master start * @throws IOException * @throws KeeperException * @throws InterruptedException */ - void joinCluster() throws IOException, KeeperException, InterruptedException { + void joinCluster(final Set onlineServers) throws IOException, + KeeperException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are // synchronized. The presumption is that in this case it is safe since this @@ -318,8 +320,8 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map>> deadServers = - rebuildUserRegions(); + Map>> deadServers = + rebuildUserRegions(onlineServers); processDeadServersAndRegionsInTransition(deadServers); @@ -2190,11 +2192,14 @@ *

* Returns a map of servers that are not found to be online and the regions * they were hosting. - * @return map of servers not online to their assigned regions, as stored - * in META + * @param onlineServers if one region's location belongs to onlineServers, it + * doesn't need to assign + * @return map of servers not online to their assigned regions, as stored in + * META * @throws IOException */ - Map>> rebuildUserRegions() + Map>> rebuildUserRegions( + final Set onlineServers) throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScan(this.catalogTracker); @@ -2226,7 +2231,7 @@ } addTheTablesInPartialState(this.disablingTables, this.enablingTables, regionInfo, tableName); - } else if (!this.serverManager.isServerOnline(regionLocation)) { + } else if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online List> offlineRegions = offlineServers.get(regionLocation); Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionserverKilled.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.TestMasterFailover; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +public class TestHRegionserverKilled { + private static final Log LOG = LogFactory.getLog(TestMasterFailover.class); + + @Test(timeout = 180000) + public void testDataCorrectnessWhenMasterFailOver() throws Exception { + final int NUM_MASTERS = 1; + final int NUM_RS = 2; + final byte[] TABLENAME = Bytes + .toBytes("testRegionCorrectnessWhenMasterFailOver"); + final byte[] FAMILY = Bytes.toBytes("family"); + final byte[][] SPLITKEYS = { Bytes.toBytes("split") }; + + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + + // Start the cluster + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + + HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILY)); + HBaseAdmin hbaseAdmin = TEST_UTIL.getHBaseAdmin(); + hbaseAdmin.createTable(desc, SPLITKEYS); + + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + List regionList = hbaseAdmin.getTableRegions(TABLENAME); + for (HRegionInfo hri : regionList) { + System.out.println(hri); + } + + HTable table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + List puts = new ArrayList(); + Put put1 = new Put(Bytes.toBytes("1row")); + put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + Put put2 = new Put(Bytes.toBytes("zrow")); + put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value")); + puts.add(put1); + puts.add(put2); + + table.put(puts); + + ResultScanner resultScanner = table.getScanner(new Scan()); + int count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + int metaServerNum = cluster.getServerWithMeta(); + + /* Starting test */ + + // First abort master + cluster.abortMaster(0); + cluster.getMasterThreads().get(0).join(); + System.out.println("Master is aborted"); + + cluster.getConfiguration().setInt("hbase.master.initialization.sleep", + 10000); + HMaster master = cluster.startMaster().getMaster(); + while (!master.isLogSplittedAfterStartup()) { + Thread.sleep(1000); + } + master.getRegionServerTracker().setDelayTime(60 * 1000); + System.out.println("splitted:" + master.isLogSplittedAfterStartup() + + ",initialized:" + master.isInitialized()); + + // Second kill meta server + cluster.getRegionServer(metaServerNum).kill(); + System.out.println("Killing regionserver"); + while (!master.isInitialized()) { + Thread.sleep(1000); + } + System.out.println("master isInitialized"); + // Third check whether data is correct in meta region + assertTrue(hbaseAdmin.isTableAvailable(TABLENAME)); + + table = new HTable(TEST_UTIL.getConfiguration(), TABLENAME); + resultScanner = table.getScanner(new Scan()); + count = 0; + while (resultScanner.next() != null) { + count++; + } + resultScanner.close(); + table.close(); + assertEquals(2, count); + + TEST_UTIL.shutdownMiniCluster(); + + } + +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -238,14 +238,30 @@ } } while (retrySplitting); } - + public void splitLog(final ServerName serverName) throws IOException { + splitLog(serverName, false); + } + + /** + * @param forcedLocal true if we split log in a non-distributed fashion. + */ + public void splitLog(final ServerName serverName, boolean forcedLocal) + throws IOException { List serverNames = new ArrayList(); serverNames.add(serverName); - splitLog(serverNames); + splitLog(serverNames, forcedLocal); } public void splitLog(final List serverNames) throws IOException { + splitLog(serverNames, false); + } + + /** + * @param forcedLocal true if we split log in a non-distributed fashion. + */ + public void splitLog(final List serverNames, boolean forcedLocal) + throws IOException { long splitTime = 0, splitLogSize = 0; List logDirs = new ArrayList(); for(ServerName serverName: serverNames){ @@ -271,7 +287,7 @@ return; } - if (distributedLogSplitting) { + if (!forcedLocal && distributedLogSplitting) { splitLogManager.handleDeadWorkers(serverNames); splitTime = EnvironmentEdgeManager.currentTimeMillis(); splitLogSize = splitLogManager.splitLogDistributed(logDirs); @@ -521,4 +537,13 @@ this.services.getTableDescriptors().add(htd); return htd; } + + /** + * return path to log dir if server's log dir exists. Otherwise null + */ + public Path logDirExists(ServerName serverName) throws IOException { + Path serverLogDir = new Path(this.rootdir, + HLog.getHLogDirectoryName(serverName.toString())); + return this.fs.exists(serverLogDir) ? serverLogDir : null; + } } Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -45,10 +45,10 @@ import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; -import org.apache.hadoop.hbase.regionserver.RegionOpeningState; import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; +import org.apache.hadoop.hbase.regionserver.RegionOpeningState; /** * The ServerManager class manages info about region servers. @@ -177,7 +177,7 @@ if (existingServer.getStartcode() < serverName.getStartcode()) { LOG.info("Triggering server recovery; existingServer " + existingServer + " looks stale, new server:" + serverName); - expireServer(existingServer); + expireServer(existingServer, false, false); } throw new PleaseHoldException(message); } @@ -295,11 +295,30 @@ } } + /** + * @return Set of known dead servers. + */ public Set getDeadServers() { return this.deadservers.clone(); } /** + * @return Set of dead servers which are being processed by the + * ServerShutdownHander. + */ + Set getDeadServersInProgress() { + return this.deadservers.getDeadServersInProgress(); + } + + public boolean isDeadServerInProcess(ServerName server) { + if (!this.deadservers.areDeadServersInProgress()) { + return false; + } + return this.deadservers.getDeadServersInProgress().contains(server); + + } + + /** * Checks if any dead servers are currently in progress. * @return true if any RS are being processed as dead, false if not */ @@ -307,6 +326,20 @@ return this.deadservers.areDeadServersInProgress(); } + /** + * @return true if root server is being processed as dead. + */ + public boolean isDeadRootServerInProgress() { + return this.deadservers.isDeadRootServerInProgress(); + } + + /** + * @return true if meta server is being processed as dead. + */ + public boolean isDeadMetaServerInProgress() { + return this.deadservers.isDeadMetaServerInProgress(); + } + void letRegionServersShutdown() { long previousLogTime = 0; while (!onlineServers.isEmpty()) { @@ -342,11 +375,14 @@ .excludeRegionServerForSchemaChanges(serverName.getServerName()); } - /* - * Expire the passed server. Add it to list of deadservers and queue a + /** + * Expire the passed server. Add it to list of deadservers and queue a * shutdown processing. + * @param definitiveRootServer true if it's dead server carrying -ROOT- definitely + * @param definitiveMetaServer true if it's dead server carrying .META. definitely */ - public synchronized void expireServer(final ServerName serverName) { + public synchronized void expireServer(final ServerName serverName, + boolean definitiveRootServer, boolean definitiveMetaServer) { excludeRegionServerFromSchemaChanges(serverName); if (!this.onlineServers.containsKey(serverName)) { LOG.warn("Received expiration of " + serverName + @@ -379,9 +415,12 @@ return; } - boolean carryingRoot = services.getAssignmentManager().isCarryingRoot(serverName); - boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName); + boolean carryingRoot = definitiveRootServer ? definitiveRootServer : services + .getAssignmentManager().isCarryingRoot(serverName); + boolean carryingMeta = definitiveMetaServer ? definitiveMetaServer : services + .getAssignmentManager().isCarryingMeta(serverName); if (carryingRoot || carryingMeta) { + this.deadservers.add(serverName, carryingRoot, carryingMeta); this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, this.services, this.deadservers, serverName, carryingRoot, carryingMeta)); } else { @@ -629,4 +668,5 @@ } } } + } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (revision 1244360) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (working copy) @@ -47,6 +47,8 @@ private NavigableSet regionServers = new TreeSet(); private ServerManager serverManager; private Abortable abortable; + // used for test + private int delayTime = 0; public RegionServerTracker(ZooKeeperWatcher watcher, Abortable abortable, ServerManager serverManager) { @@ -86,9 +88,20 @@ } } + public void setDelayTime(int delayTime) { + this.delayTime = delayTime; + } + @Override public void nodeDeleted(String path) { if (path.startsWith(watcher.rsZNode)) { + if (delayTime > 0) { + // used for testing + try { + Thread.sleep(delayTime); + } catch (InterruptedException e) { + } + } String serverName = ZKUtil.getNodeName(path); LOG.info("RegionServer ephemeral node deleted, processing expiration [" + serverName + "]"); @@ -98,7 +111,7 @@ return; } remove(sn); - this.serverManager.expireServer(sn); + this.serverManager.expireServer(sn, false, false); } }