Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1332911) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1381,11 +1381,12 @@ * Bulk assign regions to destination. * @param destination * @param regions Regions to assign. + * @return true if successful */ - void assign(final ServerName destination, + boolean assign(final ServerName destination, final List regions) { if (regions.size() == 0) { - return; + return true; } LOG.debug("Bulk assigning " + regions.size() + " region(s) to " + destination.toString()); @@ -1412,7 +1413,7 @@ new CreateUnassignedAsyncCallback(this.watcher, destination, counter); for (RegionState state: states) { if (!asyncSetOfflineInZooKeeper(state, cb, state)) { - return; + return false; } } // Wait until all unassigned nodes have been put up and watchers set. @@ -1443,7 +1444,7 @@ if (decodedException instanceof RegionServerStoppedException) { LOG.warn("The region server was shut down, ", decodedException); // No need to retry, the region server is a goner. - return; + return false; } else if (decodedException instanceof ServerNotRunningYetException) { // This is the one exception to retry. For all else we should just fail // the startup. @@ -1461,13 +1462,57 @@ // Can be a socket timeout, EOF, NoRouteToHost, etc LOG.info("Unable to communicate with the region server in order" + " to assign regions", e); + return false; } catch (InterruptedException e) { throw new RuntimeException(e); } LOG.debug("Bulk assigning done for " + destination.toString()); + return true; } /** + * Bulk assign regions to available servers if any with retry, else assign + * region singly. + * + * @param regions all regions to assign + * @param servers all available servers + */ + public void quickAssign(List regions, List servers) { + LOG.info("Quickly assigning " + regions.size() + " region(s) across " + + servers.size() + " server(s)"); + if(regions.isEmpty()) return; + Map> bulkPlan = balancer + .roundRobinAssignment(regions, servers); + if (bulkPlan == null) { + LOG.info("Failed getting bulk plan, assigning region singly"); + for (HRegionInfo region : regions) { + assign(region, true); + } + return; + } + Map> failedPlans = new HashMap>(); + for (Map.Entry> e : bulkPlan.entrySet()) { + try { + if (!assign(e.getKey(), e.getValue())) { + failedPlans.put(e.getKey(), e.getValue()); + } + } catch (Throwable t) { + failedPlans.put(e.getKey(), e.getValue()); + } + } + if (!failedPlans.isEmpty()) { + servers.removeAll(failedPlans.keySet()); + List reassigningRegions = new ArrayList(); + for (Map.Entry> e : failedPlans.entrySet()) { + LOG.info("Failed assigning " + e.getValue().size() + + " regions to server " + e.getKey() + ", reassigning them"); + reassigningRegions.addAll(e.getValue()); + } + quickAssign(reassigningRegions, servers); + } + } + + /** * Callback handler for create unassigned znodes used during bulk assign. */ static class CreateUnassignedAsyncCallback implements AsyncCallback.StringCallback { Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1332911) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.handler; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -285,6 +286,7 @@ // Iterate regions that were on this server and assign them if (hris != null) { + List toAssignRegions = new ArrayList(); for (Map.Entry e: hris.entrySet()) { if (processDeadRegion(e.getKey(), e.getValue(), this.services.getAssignmentManager(), @@ -303,10 +305,15 @@ + " because it has been opened in " + addressFromAM.getServerName()); } else { - this.services.getAssignmentManager().assign(e.getKey(), true); + toAssignRegions.add(e.getKey()); } } } + // Get all available servers + List availableServers = services.getServerManager() + .getOnlineServersList(); + this.services.getAssignmentManager().quickAssign(toAssignRegions, + availableServers); } } finally { this.deadServers.finish(serverName);