Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1243831) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -945,6 +945,27 @@ LOG.debug("Bulk assigning done for " + destination.getServerName()); } + public boolean isRegionOnline(HRegionInfo hri) { + HServerInfo hsi = null; + synchronized (this.regions) { + hsi = this.regions.get(hri); + if (hsi == null) { + return false; + } + if (this.isServerOnline(hsi.getServerName())) { + return true; + } else { + // Remove the assignment mapping for hsi. + Set hriSet = this.servers.get(hsi); + if (hriSet != null) { + hriSet.remove(hri); + } + this.regions.remove(hri); + return false; + } + } + } + protected void setEnabledTable(String tableName) { try { this.zkTable.setEnabledTable(tableName); @@ -2066,10 +2087,13 @@ /** * Process shutdown server removing any assignments. + * * @param hsi Server that went down. - * @return list of regions in transition on this server + * @return list of regions in transition and region plans on this server */ - public List processServerShutdown(final HServerInfo hsi) { + public RegionsOnDeadServer processServerShutdown(final HServerInfo hsi) { + RegionsOnDeadServer regionsOnDeadServer = new RegionsOnDeadServer(); + Set regionsFromRegionPlansForServer = new HashSet(); // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { for (Iterator > i = @@ -2078,11 +2102,16 @@ HServerInfo otherHsi = e.getValue().getDestination(); // The HSI will be null if the region is planned for a random assign. if (otherHsi != null && otherHsi.equals(hsi)) { + // Store the related regions in regionPlans. + regionsFromRegionPlansForServer.add(e.getValue().getRegionInfo()); // Use iterator's remove else we'll get CME i.remove(); } } } + + regionsOnDeadServer + .setRegionsFromRegionPlansForServer(regionsFromRegionPlansForServer); // TODO: Do we want to sync on RIT here? // Remove this server from map of servers to regions, and remove all regions // of this server from online map of regions. @@ -2091,8 +2120,9 @@ synchronized (this.regions) { Set assignedRegions = this.servers.remove(hsi); if (assignedRegions == null || assignedRegions.isEmpty()) { + regionsOnDeadServer.setRegionsInTransition(rits); // No regions on this server, we are done, return empty list of RITs - return rits; + return regionsOnDeadServer; } deadRegions = new TreeSet(assignedRegions); for (HRegionInfo region : deadRegions) { @@ -2109,7 +2139,8 @@ } } } - return rits; + regionsOnDeadServer.setRegionsInTransition(rits); + return regionsOnDeadServer; } /** @@ -2358,4 +2389,30 @@ public boolean isServerOnline(String serverName) { return this.serverManager.isServerOnline(serverName); } + + /** + * Store the related regions on a dead server, used by processServerShutdown. + */ + public static class RegionsOnDeadServer { + // The regions which being processed on this dead server. + private Set regionsFromRegionPlansForServer = null; + private List regionsInTransition = null; + + public Set getRegionsFromRegionPlansForServer() { + return regionsFromRegionPlansForServer; + } + + public void setRegionsFromRegionPlansForServer( + Set regionsFromRegionPlansForServer) { + this.regionsFromRegionPlansForServer = regionsFromRegionPlansForServer; + } + + public List getRegionsInTransition() { + return regionsInTransition; + } + + public void setRegionsInTransition(List regionsInTransition) { + this.regionsInTransition = regionsInTransition; + } + } } Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1243831) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -20,15 +20,17 @@ package org.apache.hadoop.hbase.master.handler; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.catalog.CatalogTracker; @@ -38,6 +40,7 @@ import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionsOnDeadServer; import org.apache.hadoop.hbase.master.DeadServer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerManager; @@ -158,9 +161,14 @@ // doing after log splitting. Could do some states before -- OPENING? // OFFLINE? -- and then others after like CLOSING that depend on log // splitting. - List regionsInTransition = - this.services.getAssignmentManager().processServerShutdown(this.hsi); - + Set regionsFromRegionPlansForServer = new HashSet(); + List regionsInTransition = new ArrayList(); + RegionsOnDeadServer regionsOnDeadServer = this.services + .getAssignmentManager().processServerShutdown(this.hsi); + regionsFromRegionPlansForServer = regionsOnDeadServer + .getRegionsFromRegionPlansForServer(); + regionsInTransition = regionsOnDeadServer.getRegionsInTransition(); + // Assign root and meta if we were carrying them. if (isCarryingRoot()) { // -ROOT- LOG.info("Server " + serverName + " was carrying ROOT. Trying to assign."); @@ -207,32 +215,44 @@ " regions(s) that are already in transition)"); // Iterate regions that were on this server and assign them - for (Map.Entry e : hris.entrySet()) { - if (processDeadRegion(e.getKey(), e.getValue(), - this.services.getAssignmentManager(), - this.server.getCatalogTracker())) { - RegionState rit = this.services.getAssignmentManager() - .isRegionInTransition(e.getKey()); - Pair p = - this.services - .getAssignmentManager().getAssignment( - e.getKey().getEncodedNameAsBytes()); - - if (rit != null && !rit.isClosing() && !rit.isPendingClose()) { - // Skip regions that were in transition unless CLOSING or - // PENDING_CLOSE - LOG.info("Skip assigning region " + rit.toString()); - } else if ((p != null) && (p.getSecond() != null) - && (p.getSecond().equals(this.hsi))) { - LOG.debug("Skip assigning region " - + e.getKey().getRegionNameAsString() - + " because it has been opened in " - + p.getSecond()); - } else { - this.services.getAssignmentManager().assign(e.getKey(), true); + if (hris != null) { + for (Map.Entry e : hris.entrySet()) { + if (processDeadRegion(e.getKey(), e.getValue(), + this.services.getAssignmentManager(), + this.server.getCatalogTracker())) { + RegionState rit = this.services.getAssignmentManager() + .isRegionInTransition(e.getKey()); + Pair p = this.services + .getAssignmentManager().getAssignment( + e.getKey().getEncodedNameAsBytes()); + + if (rit != null && !rit.isClosing() && !rit.isPendingClose() + && !regionsFromRegionPlansForServer.contains(rit.getRegion())) { + // Skip regions that were in transition unless CLOSING or + // PENDING_CLOSE + LOG.info("Skip assigning region " + rit.toString()); + } else if ((p != null) && (p.getSecond() != null) + && !(p.getSecond().equals(this.hsi))) { + LOG.debug("Skip assigning region " + + e.getKey().getRegionNameAsString() + + " because it has been opened in " + p.getSecond()); + } else { + this.services.getAssignmentManager().assign(e.getKey(), true); + regionsFromRegionPlansForServer.remove(e.getKey()); + } } } } + + int reassignedPlans = 0; + for (HRegionInfo hri : regionsFromRegionPlansForServer) { + if (!this.services.getAssignmentManager().isRegionOnline(hri)) { + this.services.getAssignmentManager().assign(hri, true); + reassignedPlans++; + } + } + LOG.info(reassignedPlans + " regions which planned to open on " + + this.hsi.getServerName() + " be re-assigned."); } finally { this.deadServers.finish(serverName); }