Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1230155) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -20,7 +20,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; -import java.util.Map; +import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -155,7 +155,7 @@ * @param onlineServers Map of online servers keyed by * {@link HServerInfo#getServerName()} */ - void splitLogAfterStartup(final Map onlineServers) { + void splitLogAfterStartup(final Set onlineServers) { Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); try { if (!this.fs.exists(logsDirPath)) { @@ -176,7 +176,7 @@ } for (FileStatus status : logFolders) { String serverName = status.getPath().getName(); - if (onlineServers.get(serverName) == null) { + if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); splitLog(serverName); Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1230155) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -424,6 +424,10 @@ return this.deadservers.clone(); } + public Set getDeadServersUnderProcessing() { + return this.deadservers.cloneProcessingDeadServers(); + } + /** * Checks if any dead servers are currently in progress. * @return true if any RS are being processed as dead, false if not Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1230155) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -25,8 +25,10 @@ import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; @@ -51,7 +53,6 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; -import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.Result; @@ -374,10 +375,17 @@ // Wait for region servers to report in. Returns count of regions. int regionCount = this.serverManager.waitForRegionServers(); - + + Set serversWithoutSplitLog = new HashSet(); + serversWithoutSplitLog.addAll(serverManager.getOnlineServers().keySet()); + if (this.serverManager.areDeadServersInProgress()) { + // Dead servers are processing, their logs would be split by + // ServerShutdownHandler + serversWithoutSplitLog.addAll(serverManager.getDeadServersUnderProcessing()); + } // TODO: Should do this in background rather than block master startup this.fileSystemManager. - splitLogAfterStartup(this.serverManager.getOnlineServers()); + splitLogAfterStartup(serversWithoutSplitLog); // Make sure root and meta assigned before proceeding. assignRootAndMeta(); @@ -393,7 +401,7 @@ this.assignmentManager.assignAllUserRegions(); } else { LOG.info("Master startup proceeding: master failover"); - this.assignmentManager.processFailover(); + this.assignmentManager.processFailover(serversWithoutSplitLog); } // Start balancer and meta catalog janitor after meta and regions have Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1230155) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -193,11 +193,13 @@ /** * Handle failover. Restore state from META and ZK. Handle any regions in * transition. Presumes .META. and -ROOT- deployed. + * @param onlineServers onlined servers when master start * @throws KeeperException * @throws IOException * @throws InterruptedException */ - void processFailover() throws KeeperException, IOException, InterruptedException { + void processFailover(final Set onlineServers) + throws KeeperException, IOException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are // synchronized. The presumption is that in this case it is safe since this @@ -218,7 +220,7 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions Map>> deadServers = - rebuildUserRegions(); + rebuildUserRegions(onlineServers); // Process list of dead servers; note this will add regions to the RIT. // processRegionsInTransition will read them and assign them out. processDeadServers(deadServers); @@ -1560,12 +1562,15 @@ *

* Returns a map of servers that are not found to be online and the regions * they were hosting. + * @param onlineServers if one region's location belongs to onlineServers, it + * doesn't need to be assigned * @return map of servers not online to their assigned regions, as stored * in META * @throws IOException * @throws KeeperException */ - private Map>> rebuildUserRegions() + private Map>> rebuildUserRegions( + final Set onlineServers) throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScanOfResults(catalogTracker); @@ -1592,7 +1597,7 @@ if (checkIfRegionBelongsToDisabling(regionInfo)) { disablingTables.add(disablingTableName); } - } else if (!serverManager.isServerOnline(regionLocation.getServerName())) { + } else if (!onlineServers.contains(regionLocation.getServerName())) { // Region is located on a server that isn't online List> offlineRegions = offlineServers.get(regionLocation.getServerName()); Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision 1230155) +++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (working copy) @@ -41,7 +41,8 @@ * because by then, its regions have probably been reassigned. */ private final Set deadServers = new HashSet(); - + private final Set deadServersUnderProcessing = new HashSet(); + /** Maximum number of dead servers to keep track of */ private final int maxDeadServers; @@ -111,13 +112,22 @@ return clone; } + public synchronized Set cloneProcessingDeadServers() { + Set clone = new HashSet( + this.deadServersUnderProcessing.size()); + clone.addAll(this.deadServersUnderProcessing); + return clone; + } + public synchronized boolean add(String e) { this.numProcessing++; + deadServersUnderProcessing.add(e); return deadServers.add(e); } public synchronized void finish(String e) { this.numProcessing--; + deadServersUnderProcessing.remove(e); } public synchronized int size() {