From 504768f2b57b2fec705b62ac842714157fd71702 Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Wed, 1 Oct 2014 00:12:23 -0700 Subject: [PATCH] HBASE-12139 StochasticLoadBalancer doesn't work on large lightly loaded clusters Summary: Currently the move cost overshadows the skew cost on a large cluster. This can render the split policy worse than useless and it can trap meta on the most loaded server in the cluster. Test Plan: Unit tests everywhere likeaboss Reviewers: stack, manukranthk Differential Revision: https://reviews.facebook.net/D24285 Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java --- .arcconfig | 2 +- .../master/balancer/StochasticLoadBalancer.java | 24 ++++++++++++--- .../hbase/master/balancer/BalancerTestBase.java | 18 ++++++----- .../balancer/TestStochasticLoadBalancer.java | 35 +++++++++++++++++----- 4 files changed, 59 insertions(+), 20 deletions(-) diff --git a/.arcconfig b/.arcconfig index 9cdb37d..8eef934 100644 --- a/.arcconfig +++ b/.arcconfig @@ -1,5 +1,5 @@ { - "project_id" : "hbase", + "project_id" : "HBaseOnGithub", "conduit_uri" : "https://reviews.facebook.net/", "copyright_holder" : "Apache Software Foundation", "max_line_length" : 100 diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 69a7bba..debbd9d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -597,14 +597,28 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // Compute max as if all region servers had 0 and one had the sum of all costs. This must be // a zero sum cost for this to make sense. - // TODO: Should we make this sum of square errors? double max = ((count - 1) * mean) + (total - mean); - for (double n : stats) { + + // It's possible that there aren't enough regions to go around + double min; + if (count > total) { + min = ((count - total) * mean) + ((1 - mean) * total); + } else { + // Some will have 1 more than everything else. + int numHigh = (int) (total - (Math.floor(mean) * count)); + int numLow = (int) (count - numHigh); + + min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean))); + + } + min = Math.max(0, min); + for (int i=0; i plans, Map> servers) { List result = new ArrayList(list.size()); - if (plans == null) return result; + Map map = new HashMap(list.size()); for (ServerAndLoad sl : list) { map.put(sl.getServerName(), sl); } - for (RegionPlan plan : plans) { - ServerName source = plan.getSource(); + if (plans != null) { + for (RegionPlan plan : plans) { + ServerName source = plan.getSource(); - updateLoad(map, source, -1); - ServerName destination = plan.getDestination(); - updateLoad(map, destination, +1); + updateLoad(map, source, -1); + ServerName destination = plan.getDestination(); + updateLoad(map, destination, +1); - servers.get(source).remove(plan.getRegionInfo()); - servers.get(destination).add(plan.getRegionInfo()); + servers.get(source).remove(plan.getRegionInfo()); + servers.get(destination).add(plan.getRegionInfo()); + } } result.clear(); result.addAll(map.values()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 162a257..cbd1fff 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -57,10 +57,26 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public static void beforeAllTests() throws Exception { Configuration conf = HBaseConfiguration.create(); conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 0.75f); + conf.setFloat("hbase.regions.slop", 0.0f); loadBalancer = new StochasticLoadBalancer(); loadBalancer.setConf(conf); } + int[] largeCluster = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 56 }; + // int[testnum][servernumber] -> numregions int[][] clusterStateMocks = new int[][]{ // 1 node @@ -78,7 +94,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{0, 1}, new int[]{10, 1}, new int[]{514, 1432}, - new int[]{47, 53}, + new int[]{48, 53}, // 3 node new int[]{0, 1, 2}, new int[]{1, 2, 3}, @@ -115,7 +131,9 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { new int[]{10, 7, 12, 8, 11, 10, 9, 14}, new int[]{13, 14, 6, 10, 10, 10, 8, 10}, new int[]{130, 14, 60, 10, 100, 10, 80, 10}, - new int[]{130, 140, 60, 100, 100, 100, 80, 100} + new int[]{130, 140, 60, 100, 100, 100, 80, 100}, + largeCluster, + }; @Test @@ -191,18 +209,21 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { assertTrue(cost >= 0); assertTrue(cost <= 1.01); } - assertEquals(1, + + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 0, 0, 1})), 0.01); - assertEquals(.75, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 0, 1, 1})), 0.01); - assertEquals(.5, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 0, 1, 1, 1})), 0.01); - assertEquals(.25, + assertEquals(0, costFunction.cost(mockCluster(new int[]{0, 1, 1, 1, 1})), 0.01); assertEquals(0, costFunction.cost(mockCluster(new int[]{1, 1, 1, 1, 1})), 0.01); assertEquals(0, costFunction.cost(mockCluster(new int[]{10, 10, 10, 10, 10})), 0.01); + assertEquals(1, + costFunction.cost(mockCluster(new int[]{10000, 0, 0, 0, 0})), 0.01); } @Test @@ -234,7 +255,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { for (int i =0; i < 100; i++) { statTwo[i] = 0; } - statTwo[100] = 100; + statTwo[100] = 101; assertEquals(1, costFunction.costFromArray(statTwo), 0.01); double[] statThree = new double[200]; -- 2.0.0