Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1243831) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -945,6 +945,14 @@ LOG.debug("Bulk assigning done for " + destination.getServerName()); } + public boolean isRegionOnline(HRegionInfo hri) { + HServerInfo hsi = this.regions.get(hri); + if (hsi != null && this.isServerOnline(hsi.getServerName())) { + return true; + } + return false; + } + protected void setEnabledTable(String tableName) { try { this.zkTable.setEnabledTable(tableName); @@ -2066,10 +2074,14 @@ /** * Process shutdown server removing any assignments. + * * @param hsi Server that went down. - * @return list of regions in transition on this server + * @return list of regions in transition on this server and region plans on + * this server */ - public List processServerShutdown(final HServerInfo hsi) { + public RegionsWithDeadServer processServerShutdown(final HServerInfo hsi) { + RegionsWithDeadServer regionsWithDeadServer = new RegionsWithDeadServer(); + Set regionPlanOnThisServer = new HashSet(); // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator > i = @@ -2078,11 +2090,15 @@ HServerInfo otherHsi = e.getValue().getDestination(); // The HSI will be null if the region is planned for a random assign. if (otherHsi != null && otherHsi.equals(hsi)) { + // Store the plans. + regionPlanOnThisServer.add(e.getValue().getRegionInfo()); // Use iterator's remove else we'll get CME i.remove(); } } } + + regionsWithDeadServer.setRegionPlanOnThisServer(regionPlanOnThisServer); // TODO: Do we want to sync on RIT here? // Remove this server from map of servers to regions, and remove all regions // of this server from online map of regions. @@ -2091,8 +2107,9 @@ synchronized (this.regions) { Set assignedRegions = this.servers.remove(hsi); if (assignedRegions == null || assignedRegions.isEmpty()) { + regionsWithDeadServer.setRegionsInTransition(rits); // No regions on this server, we are done, return empty list of RITs - return rits; + return regionsWithDeadServer; } deadRegions = new TreeSet(assignedRegions); for (HRegionInfo region : deadRegions) { @@ -2109,7 +2126,8 @@ } } } - return rits; + regionsWithDeadServer.setRegionsInTransition(rits); + return regionsWithDeadServer; } /** @@ -2358,4 +2376,29 @@ public boolean isServerOnline(String serverName) { return this.serverManager.isServerOnline(serverName); } + + /** + * Process result used by processServerShutdown. + */ + public static class RegionsWithDeadServer { + // The regions which plan to open on the ServerShutdown processing server. + private Set regionPlanOnThisServer = null; + private List regionsInTransition = null; + + public Set getRegionPlanOnThisServer() { + return regionPlanOnThisServer; + } + + public void setRegionPlanOnThisServer(Set regionPlanOnThisServer) { + this.regionPlanOnThisServer = regionPlanOnThisServer; + } + + public List getRegionsInTransition() { + return regionsInTransition; + } + + public void setRegionsInTransition(List regionsInTransition) { + this.regionsInTransition = regionsInTransition; + } + } } Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1243831) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionsWithDeadServer; import org.apache.hadoop.hbase.master.DeadServer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerManager; @@ -158,9 +160,13 @@ // doing after log splitting. Could do some states before -- OPENING? // OFFLINE? -- and then others after like CLOSING that depend on log // splitting. - List regionsInTransition = - this.services.getAssignmentManager().processServerShutdown(this.hsi); - + RegionsWithDeadServer regionsWithDeadServer = this.services + .getAssignmentManager().processServerShutdown(this.hsi); + Set regionPlanOnThisServer = regionsWithDeadServer + .getRegionPlanOnThisServer(); + List regionsInTransition = regionsWithDeadServer + .getRegionsInTransition(); + // Assign root and meta if we were carrying them. if (isCarryingRoot()) { // -ROOT- LOG.info("Server " + serverName + " was carrying ROOT. Trying to assign."); @@ -207,32 +213,49 @@ " regions(s) that are already in transition)"); // Iterate regions that were on this server and assign them - for (Map.Entry e : hris.entrySet()) { - if (processDeadRegion(e.getKey(), e.getValue(), - this.services.getAssignmentManager(), - this.server.getCatalogTracker())) { - RegionState rit = this.services.getAssignmentManager() - .isRegionInTransition(e.getKey()); - Pair p = - this.services - .getAssignmentManager().getAssignment( - e.getKey().getEncodedNameAsBytes()); - - if (rit != null && !rit.isClosing() && !rit.isPendingClose()) { - // Skip regions that were in transition unless CLOSING or - // PENDING_CLOSE - LOG.info("Skip assigning region " + rit.toString()); - } else if ((p != null) && (p.getSecond() != null) - && (p.getSecond().equals(this.hsi))) { - LOG.debug("Skip assigning region " - + e.getKey().getRegionNameAsString() - + " because it has been opened in " - + p.getSecond()); - } else { - this.services.getAssignmentManager().assign(e.getKey(), true); + if (hris != null) { + for (Map.Entry e : hris.entrySet()) { + if (processDeadRegion(e.getKey(), e.getValue(), + this.services.getAssignmentManager(), + this.server.getCatalogTracker())) { + RegionState rit = this.services.getAssignmentManager() + .isRegionInTransition(e.getKey()); + Pair p = this.services + .getAssignmentManager().getAssignment( + e.getKey().getEncodedNameAsBytes()); + + if (rit != null && !rit.isClosing() && !rit.isPendingClose() + && regionPlanOnThisServer != null + && !regionPlanOnThisServer.contains(rit.getRegion())) { + // Skip regions that were in transition unless CLOSING or + // PENDING_CLOSE + LOG.info("Skip assigning region " + rit.toString()); + } else if ((p != null) && (p.getSecond() != null) + && !(p.getSecond().equals(this.hsi))) { + LOG.debug("Skip assigning region " + + e.getKey().getRegionNameAsString() + + " because it has been opened in " + p.getSecond()); + } else { + this.services.getAssignmentManager().assign(e.getKey(), true); + if (regionPlanOnThisServer != null) { + regionPlanOnThisServer.remove(e.getKey()); + } + } } } } + + int reassignedPlans = 0; + if (regionPlanOnThisServer != null) { + for (HRegionInfo hri : regionPlanOnThisServer) { + if (!this.services.getAssignmentManager().isRegionOnline(hri)) { + this.services.getAssignmentManager().assign(hri, true); + reassignedPlans++; + } + } + } + LOG.info(reassignedPlans + " regions which planned to open on " + + this.hsi.getServerName() + " be re-assigned."); } finally { this.deadServers.finish(serverName); }