Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1336799) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -344,7 +344,7 @@ * @throws KeeperException * @throws InterruptedException */ - void joinCluster(final Set onlineServers) throws IOException, + void joinCluster() throws IOException, KeeperException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are @@ -356,7 +356,7 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map>> deadServers = rebuildUserRegions(onlineServers); + Map>> deadServers = rebuildUserRegions(); // This method will assign all user regions if a clean server startup or // it will reconstitute master state and cleanup any leftovers from @@ -370,16 +370,6 @@ } /** - * Only used for tests - * @throws IOException - * @throws KeeperException - * @throws InterruptedException - */ - void joinCluster() throws IOException, KeeperException, InterruptedException { - joinCluster(serverManager.getOnlineServers().keySet()); - } - - /** * Process all regions that are in transition up in zookeeper. Used by * master joining an already running cluster. * @throws KeeperException @@ -2518,11 +2508,12 @@ * in META * @throws IOException */ - Map>> rebuildUserRegions( - final Set onlineServers) + Map>> rebuildUserRegions() throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScan(this.catalogTracker); + // Get any new but slow to checkin region server that joined the cluster + Set onlineServers = serverManager.getOnlineServers().keySet(); // Map of offline servers and their regions to be returned Map>> offlineServers = new TreeMap>>(); Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1336799) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -578,11 +578,10 @@ } this.assignmentManager.startTimeOutMonitor(); - Set onlineServers = new HashSet(serverManager - .getOnlineServers().keySet()); + // TODO: Should do this in background rather than block master startup status.setStatus("Splitting logs after master startup"); - splitLogAfterStartup(this.fileSystemManager, onlineServers); + splitLogAfterStartup(this.fileSystemManager); // Make sure root and meta assigned before proceeding. if (!assignRootAndMeta(status)) return; @@ -599,7 +598,7 @@ this.balancer.setMasterServices(this); // Fixup assignment manager status status.setStatus("Starting assignment manager"); - this.assignmentManager.joinCluster(onlineServers); + this.assignmentManager.joinCluster(); this.balancer.setClusterStatus(getClusterStatus()); @@ -635,9 +634,8 @@ * @param mfs * @param onlineServers */ - protected void splitLogAfterStartup(final MasterFileSystem mfs, - Set onlineServers) { - mfs.splitLogAfterStartup(onlineServers); + protected void splitLogAfterStartup(final MasterFileSystem mfs) { + mfs.splitLogAfterStartup(); } /** Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1336799) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -188,7 +188,7 @@ * @param onlineServers Set of online servers keyed by * {@link ServerName} */ - void splitLogAfterStartup(final Set onlineServers) { + void splitLogAfterStartup() { boolean retrySplitting = !conf.getBoolean("hbase.hlog.split.skip.errors", HLog.SPLIT_SKIP_ERRORS_DEFAULT); Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); @@ -209,7 +209,8 @@ sn = sn.substring(0, sn.length() - HLog.SPLITTING_EXT.length()); } ServerName serverName = ServerName.parseServerName(sn); - if (!onlineServers.contains(serverName)) { + if (!((HMaster) master).getServerManager().getOnlineServers() + .keySet().contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); serverNames.add(serverName); Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1336799) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -200,7 +200,10 @@ existingServer + " looks stale, new server:" + serverName); expireServer(existingServer); } - throw new PleaseHoldException(message); + if (services.isServerShutdownHandlerEnabled()) { + // master has completed the initialization + throw new PleaseHoldException(message); + } } } Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 1336799) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (working copy) @@ -98,9 +98,8 @@ } @Override - protected void splitLogAfterStartup(MasterFileSystem mfs, - Set onlineServers) { - super.splitLogAfterStartup(mfs, onlineServers); + protected void splitLogAfterStartup(MasterFileSystem mfs) { + super.splitLogAfterStartup(mfs); logSplit = true; // If "TestingMaster.sleep" is set, sleep after log split. if (getConfiguration().getBoolean("TestingMaster.sleep", false)) {