Index: src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java (revision 1084063) +++ src/test/java/org/apache/hadoop/hbase/master/TestLoadBalancer.java (working copy) @@ -179,17 +179,19 @@ */ @Test public void testBalanceCluster() throws Exception { - - for(int [] mockCluster : clusterStateMocks) { - Map> servers = mockClusterServers(mockCluster); - LOG.info("Mock Cluster : " + printMock(servers) + " " + printStats(servers)); - List plans = loadBalancer.balanceCluster(servers); - List balancedCluster = reconcile(servers, plans); - LOG.info("Mock Balance : " + printMock(balancedCluster)); - assertClusterAsBalanced(balancedCluster); - for(Map.Entry> entry : servers.entrySet()) { - returnRegions(entry.getValue()); - returnServer(entry.getKey()); + float[] slops = { 0, 0.1f }; + for (float slop : slops) { + for(int [] mockCluster : clusterStateMocks) { + Map> servers = mockClusterServers(mockCluster); + LOG.info("Mock Cluster : " + printMock(servers) + " " + printStats(servers)); + List plans = loadBalancer.balanceCluster(servers, slop); + List balancedCluster = reconcile(servers, plans); + LOG.info("Mock Balance : " + printMock(balancedCluster)); + assertClusterAsBalanced(balancedCluster, slop); + for(Map.Entry> entry : servers.entrySet()) { + returnRegions(entry.getValue()); + returnServer(entry.getKey()); + } } } @@ -199,7 +201,7 @@ * Invariant is that all servers have between floor(avg) and ceiling(avg) * number of regions. */ - public void assertClusterAsBalanced(List servers) { + public void assertClusterAsBalanced(List servers, float slop) { int numServers = servers.size(); int numRegions = 0; int maxRegions = 0; @@ -218,12 +220,21 @@ // less than 2 between max and min, can't balance return; } + float average = (float)numRegions / numServers; // for logging int min = numRegions / numServers; int max = numRegions % numServers == 0 ? min : min + 1; + min = (int) Math.floor(average * (1 - slop)); + if (min > 0) min -= 1; + max = (int) Math.ceil(average * (1 + slop)); for(HServerInfo server : servers) { - assertTrue(server.getLoad().getNumberOfRegions() <= max); - assertTrue(server.getLoad().getNumberOfRegions() >= min); + int regs = server.getLoad().getNumberOfRegions(); + assertTrue("server " + server.toString() + " has " + regs + + " regions, more than " + max, + server.getLoad().getNumberOfRegions() <= max); + assertTrue("server " + server.toString() + " has " + regs + + " regions, lower than " + min + " (slop=" + slop + ")", + server.getLoad().getNumberOfRegions() >= min); } } @@ -406,6 +417,9 @@ Map> servers, List plans) { if(plans != null) { for(RegionPlan plan : plans) { + if (plan.getDestination() == null) { + continue; + } plan.getSource().getLoad().setNumberOfRegions( plan.getSource().getLoad().getNumberOfRegions() - 1); plan.getDestination().getLoad().setNumberOfRegions( Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1084063) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -145,6 +145,8 @@ // manager of assignment nodes in zookeeper AssignmentManager assignmentManager; + // slop for regions + private float slop; // manager of catalog regions private CatalogTracker catalogTracker; // Cluster status zk tracker and local setter @@ -358,6 +360,11 @@ this.assignmentManager = new AssignmentManager(this, serverManager, this.catalogTracker, this.executorService); + this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2); + if (slop < 0) + slop = 0; + else if (slop > 1) + slop = 1; zooKeeper.registerListenerFirst(assignmentManager); this.regionServerTracker = new RegionServerTracker(zooKeeper, this, @@ -737,7 +744,7 @@ assignments.put(hsi, new ArrayList()); } } - List plans = this.balancer.balanceCluster(assignments); + List plans = this.balancer.balanceCluster(assignments, slop); int rpCount = 0; // number of RegionPlans balanced so far long totalRegPlanExecTime = 0; if (plans != null && !plans.isEmpty()) { Index: src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java (revision 1084063) +++ src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java (working copy) @@ -143,6 +143,11 @@ */ public List balanceCluster( Map> clusterState) { + return balanceCluster(clusterState, 0.2f); + } + + public List balanceCluster( + Map> clusterState, float slop) { long startTime = System.currentTimeMillis(); // Make a map sorted by load and count regions @@ -167,6 +172,9 @@ float average = (float)numRegions / numServers; // for logging int min = numRegions / numServers; int max = numRegions % numServers == 0 ? min : min + 1; + min = (int) Math.floor(average * (1 - slop)); + if (min > 0) min -= 1; + max = (int) Math.ceil(average * (1 + slop)); if(serversByLoad.lastKey().getLoad().getNumberOfRegions() <= max && serversByLoad.firstKey().getLoad().getNumberOfRegions() >= min) { // Skipped because no server outside (min,max) range @@ -216,11 +224,11 @@ for(Map.Entry> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad().getNumberOfRegions(); - if(regionCount >= min) { + if(regionCount >= average) { break; } serversUnderloaded++; - int numToTake = min - regionCount; + int numToTake = (int)(average - regionCount); int numTaken = 0; while(numTaken < numToTake && regionidx < regionsToMove.size()) { regionsToMove.get(regionidx).setDestination(server.getKey()); @@ -270,19 +278,19 @@ // Now we have a set of regions that must be all assigned out // Assign each underloaded up to the min, then if leftovers, assign to max - // Walk down least loaded, assigning to each to fill up to min + // Walk down least loaded, assigning to each to fill up to average for(Map.Entry> server : serversByLoad.entrySet()) { int regionCount = server.getKey().getLoad().getNumberOfRegions(); - if (regionCount >= min) break; + if (regionCount >= average) break; BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey()); if(balanceInfo != null) { regionCount += balanceInfo.getNumRegionsAdded(); } - if(regionCount >= min) { + if(regionCount >= average) { continue; } - int numToTake = min - regionCount; + int numToTake = (int)(average - regionCount); int numTaken = 0; while(numTaken < numToTake && regionidx < regionsToMove.size()) { regionsToMove.get(regionidx).setDestination(server.getKey()); Index: src/main/resources/hbase-default.xml =================================================================== --- src/main/resources/hbase-default.xml (revision 1084063) +++ src/main/resources/hbase-default.xml (working copy) @@ -282,6 +282,13 @@ + hbase.regions.slop + 0.2 + Rebalance if regionserver has average + (average * slop) regions. + Default is 20% slop. + + + hbase.master.logcleaner.ttl 600000 Maximum time a HLog can stay in the .oldlogdir directory,