Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1094812) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -743,14 +743,6 @@ Map> assignments = this.assignmentManager.getAssignments(); - // Returned Map from AM does not include mention of servers w/o assignments. - for (Map.Entry e: - this.serverManager.getOnlineServers().entrySet()) { - HServerInfo hsi = e.getValue(); - if (!assignments.containsKey(hsi)) { - assignments.put(hsi, new ArrayList()); - } - } List plans = this.balancer.balanceCluster(assignments); int rpCount = 0; // number of RegionPlans balanced so far long totalRegPlanExecTime = 0; Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1095222) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -53,9 +53,9 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.RegionTransitionData; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler; @@ -68,9 +68,9 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData; import org.apache.hadoop.io.Writable; import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.AsyncCallback; @@ -116,6 +116,12 @@ new TreeMap(); private final ZKTable zkTable; + + public enum DaughterPlacement { + SAME_HOST, + ONE_ON_LEAST_LOADED + } + private DaughterPlacement daughterPlacement; /** * Server to regions assignment map. @@ -166,6 +172,7 @@ this.zkTable = new ZKTable(this.master.getZooKeeper()); this.maximumAssignmentAttempts = this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10); + daughterPlacement = DaughterPlacement.valueOf(conf.get("hbase.daughter.region.placement", "SAME_HOST")); } /** @@ -1102,7 +1109,6 @@ */ boolean setOfflineInZooKeeper(final RegionState state) { if (!state.isClosed() && !state.isOffline()) { - new RuntimeException("Unexpected state trying to OFFLINE; " + state); this.master.abort("Unexpected state trying to OFFLINE; " + state, new IllegalStateException()); return false; @@ -1969,7 +1975,17 @@ final HRegionInfo a, final HRegionInfo b) { regionOffline(parent); regionOnline(a, hsi); - regionOnline(b, hsi); + if (daughterPlacement == DaughterPlacement.SAME_HOST) { + regionOnline(b, hsi); + } else if (daughterPlacement == DaughterPlacement.ONE_ON_LEAST_LOADED) { + if (leastLoadedServer != null && leastLoadedServer != hsi) { + LOG.info("placing " + b + " on " + leastLoadedServer + " from " + hsi); + regionOnline(b, hsi); + RegionPlan plan = new RegionPlan(b, hsi, leastLoadedServer); + balance(plan); + } + else regionOnline(b, hsi); + } // There's a possibility that the region was splitting while a user asked // the master to disable, we need to make sure we close those regions in @@ -1982,6 +1998,7 @@ } } + static HServerInfo leastLoadedServer = null; /** * @return A clone of current assignments. Note, this is assignments only. * If a new server has come in and it has no regions, it will not be included @@ -1993,6 +2010,7 @@ // wants to iterate this exported list. We need to synchronize on regions // since all access to this.servers is under a lock on this.regions. Map> result = null; + int minSize = Integer.MAX_VALUE; synchronized (this.regions) { result = new HashMap>(this.servers.size()); for (Map.Entry> e: this.servers.entrySet()) { @@ -2000,10 +2018,24 @@ HServerInfo clone = new HServerInfo(e.getKey()); // Set into server load the number of regions this server is carrying // The load balancer calculation needs it at least and its handy. - clone.getLoad().setNumberOfRegions(e.getValue().size()); + int sz = e.getValue().size(); + clone.getLoad().setNumberOfRegions(sz); result.put(clone, shallowCopy); + if (sz < minSize) { + minSize = sz; + leastLoadedServer = clone; + } } } + // We need to cover servers w/o assignments. + for (Map.Entry e: + this.serverManager.getOnlineServers().entrySet()) { + HServerInfo hsi = e.getValue(); + if (!result.containsKey(hsi)) { + result.put(hsi, new ArrayList()); + leastLoadedServer = hsi; + } + } return result; } @@ -2113,7 +2145,8 @@ private State state; private long stamp; - public RegionState() {} + public RegionState() { + } RegionState(HRegionInfo region, State state) { this(region, state, System.currentTimeMillis());