diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b6fea17..9b7e132 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1468,7 +1468,7 @@ MasterServices, Server { long cutoffTime = System.currentTimeMillis() + maximumBalanceTime; int rpCount = 0; // number of RegionPlans balanced so far long totalRegPlanExecTime = 0; - balancerRan = plans != null; + balancerRan = plans.size() != 0; if (plans != null && !plans.isEmpty()) { for (RegionPlan plan: plans) { LOG.info("balance " + plan); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 8fdd37c..06be435 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -829,8 +829,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } return false; } - // TODO: check for co-located region replicas as well - + if(areSomeRegionReplicasColocated(cs)) return true; // Check if we even need to do any load balancing // HBASE-3681 check sloppiness first float average = cs.getLoadAverage(); // for logging @@ -852,6 +851,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** + * Subclasses should implement this to return true if the cluster has nodes that hosts + * multiple replicas for the same region, or, if there are multiple racks and the same + * rack hosts replicas of the same region + * @param cs + * @return + */ + protected boolean areSomeRegionReplicasColocated(ClusterLoadState cs) { + return false; + } + + /** * Generates a bulk assignment plan to be used on cluster startup using a * simple round-robin assignment. *

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 3da6266..7403b6d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -123,6 +123,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // when new services are offered private LocalityBasedCandidateGenerator localityCandidateGenerator; private LocalityCostFunction localityCost; + private RegionReplicaHostCostFunction regionReplicaHostCostFunction; + private RegionReplicaRackCostFunction regionReplicaRackCostFunction; @Override public void setConf(Configuration conf) { @@ -152,13 +154,16 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { new StoreFileCostFunction(conf) }; + regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf); + regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); + costFunctions = new CostFunction[]{ new RegionCountSkewCostFunction(conf), new MoveCostFunction(conf), localityCost, new TableSkewCostFunction(conf), - new RegionReplicaHostCostFunction(conf), - new RegionReplicaRackCostFunction(conf), + regionReplicaHostCostFunction, + regionReplicaRackCostFunction, regionLoadFunctions[0], regionLoadFunctions[1], regionLoadFunctions[2], @@ -189,6 +194,16 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { } + @Override + protected boolean areSomeRegionReplicasColocated(ClusterLoadState cs) { + Cluster c = new Cluster(cs.getClusterState(), loads, regionFinder, rackManager); + regionReplicaHostCostFunction.init(c); + if (regionReplicaHostCostFunction.cost() > 0) return true; + regionReplicaRackCostFunction.init(c); + if (regionReplicaRackCostFunction.cost() > 0) return true; + return false; + } + /** * Given the cluster state this will try and approach an optimal balance. This * should always approach the optimal state given enough steps. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index 0294b12..0e9a321 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -118,10 +118,14 @@ public class TestRegionRebalancing { UTIL.getHBaseCluster().getMaster().balance(); assertRegionsAreBalanced(); + // On a balanced cluster, calling balance() should return false + assert(UTIL.getHBaseCluster().getMaster().balance() == false); + + // However if we add a server, then the balance() call should return true // add a region server - total of 3 LOG.info("Started third server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); // kill a region server - total of 2 @@ -135,14 +139,14 @@ public class TestRegionRebalancing { UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); LOG.info("Added fourth server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); for (int i = 0; i < 6; i++){ LOG.info("Adding " + (i + 5) + "th region server"); UTIL.getHBaseCluster().startRegionServer(); } - UTIL.getHBaseCluster().getMaster().balance(); + assert(UTIL.getHBaseCluster().getMaster().balance() == true); assertRegionsAreBalanced(); table.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index f7c51fd..3b3f7d5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.when; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -380,6 +381,34 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { assertTrue(costWith2ReplicasOnTwoServers < costWith3ReplicasSameServer); } + @Test + public void testNeedsBalance() { + // check for the case where there are two hosts and with one rack, and where + // both the replicas are hosted on the same server + List regions = randomRegions(1); + ServerName s1 = ServerName.valueOf("host1", 1000, 11111); + ServerName s2 = ServerName.valueOf("host11", 1000, 11111); + Map> map = new HashMap>(); + map.put(s1, regions); + regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1)); + // until the last step s1 holds two replicas of a region + regions = randomRegions(1); + map.put(s2, regions); + loadBalancer.setRackManager(new ForTestRackManagerOne()); + assertTrue(loadBalancer.needsBalance(new ClusterLoadState(map))); + // check for the case where there are two hosts on the same rack and there are two racks + // and both the replicas are on the same rack + map.clear(); + regions = randomRegions(1); + List regionsOnS2 = new ArrayList(1); + regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1)); + map.put(s1, regions); + map.put(s2, regionsOnS2); + // add another server so that the cluster has some host on another rack + map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1)); + assertTrue(loadBalancer.needsBalance(new ClusterLoadState(map))); + } + @Test (timeout = 60000) public void testSmallCluster() { int numNodes = 10; @@ -544,6 +573,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { } } + private static class ForTestRackManagerOne extends RackManager { + @Override + public String getRack(ServerName server) { + return server.getHostname().endsWith("1") ? "rack1" : "rack2"; + } + } + @Test (timeout = 180000) public void testRegionReplicationOnMidClusterWithRacks() { conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 4000000L);