From d31c7174fdfd7495daa7c1dae15da155035b7c60 Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Wed, 1 Oct 2014 00:12:23 -0700 Subject: [PATCH] HBASE-12139 StochasticLoadBalancer doesn't work on large lightly loaded clusters Summary: Currently the move cost overshadows the skew cost on a large cluster. This can render the split policy worse than useless and it can trap meta on the most loaded server in the cluster. Test Plan: Unit tests everywhere likeaboss Reviewers: stack, manukranthk Differential Revision: https://reviews.facebook.net/D24285 --- .arcconfig | 2 +- .../master/balancer/StochasticLoadBalancer.java | 21 ++++++++++- .../hbase/master/balancer/BalancerTestBase.java | 18 +++++---- .../balancer/TestStochasticLoadBalancer.java | 43 ++++++++++++++-------- 4 files changed, 58 insertions(+), 26 deletions(-) diff --git a/.arcconfig b/.arcconfig index 9cdb37d..8eef934 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,5 +1,5 @@ { - "project_id" : "hbase", + "project_id" : "HBaseOnGithub", "conduit_uri" : "https://reviews.facebook.net/", "copyright_holder" : "Apache Software Foundation", "max_line_length" : 100 diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 6f564e0..7e57670 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -808,9 +808,24 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // Compute max as if all region servers had 0 and one had the sum of all costs. This must be // a zero sum cost for this to make sense. - // TODO: Should we make this sum of square errors? double max = ((count - 1) * mean) + (total - mean); + + // It's possible that there aren't enough regions to go around + double min; + if (count > total) { + min = ((count - total) * mean) + ((1 - mean) * total); + } else { + // Some will have 1 more than everything else. + int numHigh = (int) (total - (Math.floor(mean) * count)); + int numLow = (int) (count - numHigh); + + min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean))); + + } + min = Math.max(0, min); for (int i=0; i 1 && cluster.isActiveMaster(i)) { // Not count the active master load continue; @@ -820,7 +835,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { totalCost += diff; } - double scaled = scale(0, max, totalCost); + double scaled = scale(min, max, totalCost); return scaled; } @@ -844,6 +859,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { if (max <= min || value <= min) { return 0; } + if ((max - min) == 0) return 0; return Math.max(0d, Math.min(1d, (value - min) / (max - min))); } @@ -919,6 +935,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i =0; i < cluster.numServers; i++) { stats[i] = cluster.regionsPerServer[i].length; } + return costFromArray(stats); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java index 55c91f4..37a4a75 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java @@ -180,20 +180,22 @@ public class BalancerTestBase { List plans, Map> servers) { List result = new ArrayList(list.size()); - if (plans == null) return result; + Map map = new HashMap(list.size()); for (ServerAndLoad sl : list) { map.put(sl.getServerName(), sl); } - for (RegionPlan plan : plans) { - ServerName source = plan.getSource(); + if (plans != null) { + for (RegionPlan plan : plans) { + ServerName source = plan.getSource(); - updateLoad(map, source, -1); - ServerName destination = plan.getDestination(); - updateLoad(map, destination, +1); + updateLoad(map, source, -1); + ServerName destination = plan.getDestination(); + updateLoad(map, destination, +1); - servers.get(source).remove(plan.getRegionInfo()); - servers.get(destination).add(plan.getRegionInfo()); + servers.get(source).remove(plan.getRegionInfo()); + servers.get(destination).add(plan.getRegionInfo()); + } } result.clear(); result.addAll(map.values()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 4ff5afe..adb8a4d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -66,10 +66,26 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public static void beforeAllTests() throws Exception { conf = HBaseConfiguration.create(); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); + conf.setFloat("hbase.regions.slop", 0.0f); loadBalancer = new StochasticLoadBalancer(); loadBalancer.setConf(conf); } + int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 56 }; + // int[testnum][servernumber] -> numregions int[][] clusterStateMocks = new int[][]{ // 1 node @@ -87,7 +103,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{0, 1}, new int[]{10, 1}, new int[]{514, 1432}, - new int[]{47, 53}, + new int[]{48, 53}, // 3 node new int[]{0, 1, 2}, new int[]{1, 2, 3}, @@ -124,7 +140,9 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{10, 7, 12, 8, 11, 10, 9, 14}, new int[]{13, 14, 6, 10, 10, 10, 8, 10}, new int[]{130, 14, 60, 10, 100, 10, 80, 10}, - new int[]{130, 140, 60, 100, 100, 100, 80, 100} + new int[]{130, 140, 60, 100, 100, 100, 80, 100}, + largeCluster, + }; @Test @@ -201,24 +219,19 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { assertTrue(cost >= 0); assertTrue(cost <= 1.01); } + costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1})); - assertEquals(1, - costFunction.cost(), 0.01); + assertEquals(0,costFunction.cost(), 0.01); costFunction.init(mockCluster(new int[]{0, 0, 0, 1, 1})); - assertEquals(.75, - costFunction.cost(), 0.01); + assertEquals(0, costFunction.cost(), 0.01); costFunction.init(mockCluster(new int[]{0, 0, 1, 1, 1})); - assertEquals(.5, - costFunction.cost(), 0.01); + assertEquals(0, costFunction.cost(), 0.01); costFunction.init(mockCluster(new int[]{0, 1, 1, 1, 1})); - assertEquals(.25, - costFunction.cost(), 0.01); + assertEquals(0, costFunction.cost(), 0.01); costFunction.init(mockCluster(new int[]{1, 1, 1, 1, 1})); - assertEquals(0, - costFunction.cost(), 0.01); - costFunction.init(mockCluster(new int[]{10, 10, 10, 10, 10})); - assertEquals(0, - costFunction.cost(), 0.01); + assertEquals(0, costFunction.cost(), 0.01); + costFunction.init(mockCluster(new int[]{10000, 0, 0, 0, 0})); + assertEquals(1, costFunction.cost(), 0.01); } @Test -- 2.0.0