Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1182166) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -313,9 +313,7 @@ // Returns servers who have not checked in (assumed dead) and their regions Map>> deadServers = rebuildUserRegions(); - // Process list of dead servers; note this will add regions to the RIT. - // processRegionsInTransition will read them and assign them out. - processDeadServers(deadServers); + // Check existing regions in transition processRegionsInTransition(deadServers); @@ -372,12 +370,10 @@ // If we found user regions out on cluster, its a failover. if (regionsToProcess) { - LOG.info("Found regions out on cluster or in RIT; failover"); - if (!nodes.isEmpty()) { - for (String encodedRegionName: nodes) { - processRegionInTransition(encodedRegionName, null, deadServers); - } - } + LOG.info("Found regions out on cluster or in RIT; failover"); + // Process list of dead servers and in RIT + recoverLostRegion(deadServers); + } else { // Fresh cluster startup. LOG.info("Clean cluster startup. Assigning userregions"); @@ -2222,10 +2218,10 @@ } /** - * Processes list of dead servers from result of META scan. + * Processes list of dead servers from result of META scan and regions in RIT *

- * This is used as part of failover to handle RegionServers which failed - * while there was no active master. + * This is used for failover to recovery the lost regions that belong to + * RegionServers which failed while there was no active master or are in RIT. *

* Method stubs in-memory data to be as expected by the normal server shutdown * handler. @@ -2234,7 +2230,7 @@ * @throws IOException * @throws KeeperException */ - private void processDeadServers( + private void recoverLostRegion( Map>> deadServers) throws IOException, KeeperException { for (Map.Entry>> deadServer: @@ -2251,25 +2247,31 @@ // If zk node of this region has been updated by a live server, // we consider that this region is being handled. // So we should skip it and process it in processRegionsInTransition. - if (data != null && data.getOrigin() != null && - serverManager.isServerOnline(data.getOrigin())) { + if (data != null && data.getOrigin() != null + && serverManager.isServerOnline(data.getOrigin())) { LOG.info("The region " + regionInfo.getEncodedName() + "is being handled on " + data.getOrigin()); continue; } // Process with existing RS shutdown code - boolean assign = - ServerShutdownHandler.processDeadRegion(regionInfo, result, this, - this.catalogTracker); + boolean assign = ServerShutdownHandler.processDeadRegion(regionInfo, + result, this, this.catalogTracker); if (assign) { ZKAssign.createOrForceNodeOffline(watcher, regionInfo, - master.getServerName()); + master.getServerName()); } } catch (KeeperException.NoNodeException nne) { // This is fine } } } + List nodes = ZKUtil.listChildrenAndWatchForNewChildren(watcher, + watcher.assignmentZNode); + if (!nodes.isEmpty()) { + for (String encodedRegionName : nodes) { + processRegionInTransition(encodedRegionName, null, deadServers); + } + } } /*