From ff92c4538465c47a512d4c2cc5d4f461f468af99 Mon Sep 17 00:00:00 2001 From: Kahlil Oppenheimer Date: Mon, 27 Feb 2017 17:26:08 -0500 Subject: [PATCH] Made balancer respect max moves and short-circuit when hit --- .../master/balancer/StochasticLoadBalancer.java | 43 +++++++++++----------- .../balancer/TestStochasticLoadBalancer.java | 37 +++++++++++++++---- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 8825637..cd13f72 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -109,6 +109,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private static final String TABLE_FUNCTION_SEP = "_"; protected static final String MIN_COST_NEED_BALANCE_KEY = "hbase.master.balancer.stochastic.minCostNeedBalance"; + protected static final String MAX_MOVE_PERCENT_KEY = + "hbase.master.balancer.stochastic.maxMovePercent"; private static final Random RANDOM = new Random(System.currentTimeMillis()); private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class); @@ -122,6 +124,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private int numRegionLoadsToRemember = 15; private float minCostNeedBalance = 0.05f; + private float maxMovePercent = 0.25f; + private CandidateGenerator[] candidateGenerators; private CostFromRegionLoadFunction[] regionLoadFunctions; private CostFunction[] costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC @@ -166,6 +170,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); + maxMovePercent = conf.getFloat(MAX_MOVE_PERCENT_KEY, maxMovePercent); + minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance); if (localityCandidateGenerator == null) { @@ -361,12 +367,26 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { double initCost = currentCost; double newCost = currentCost; + long maxMovedRegions = Math.round(maxMovePercent * cluster.numRegions); long computedMaxSteps = Math.min(this.maxSteps, ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers)); // Perform a stochastic walk to see if we can get a good fit. long step; for (step = 0; step < computedMaxSteps; step++) { + + if (cluster.numMovedRegions >= maxMovedRegions) { + // If we are at max # moves and did not meet cost threshold for needing balance, exit early + if (maxMovedRegions == 0 || currentCost < initCost * (1 - minCostNeedBalance)) { + break; + } else { // Otherwise, restart cluster state to find better set of moves that might meet threshold + cluster = new Cluster(clusterState, loads, finder, rackManager); + initCosts(cluster); + currentCost = computeCost(cluster, Double.MAX_VALUE); + continue; + } + } + int generatorIdx = RANDOM.nextInt(candidateGenerators.length); CandidateGenerator p = candidateGenerators[generatorIdx]; Cluster.Action action = p.generate(cluster); @@ -1053,39 +1073,18 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { */ static class MoveCostFunction extends CostFunction { private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost"; - private static final String MAX_MOVES_PERCENT_KEY = - "hbase.master.balancer.stochastic.maxMovePercent"; private static final float DEFAULT_MOVE_COST = 7; - private static final int DEFAULT_MAX_MOVES = 600; - private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f; - - private final float maxMovesPercent; MoveCostFunction(Configuration conf) { super(conf); - // Move cost multiplier should be the same cost or higher than the rest of the costs to ensure // that large benefits are need to overcome the cost of a move. this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST)); - // What percent of the number of regions a single run of the balancer can move. - maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT); } @Override double cost() { - // Try and size the max number of Moves, but always be prepared to move some. - int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent), - DEFAULT_MAX_MOVES); - - double moveCost = cluster.numMovedRegions; - - // Don't let this single balance move more than the max moves. - // This allows better scaling to accurately represent the actual cost of a move. - if (moveCost > maxMoves) { - return 1000000; // return a number much greater than any of the other cost - } - - return scale(0, Math.min(cluster.numRegions, maxMoves), moveCost); + return scale(0, cluster.numRegions, cluster.numMovedRegions); } } diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index 614d2fb..68305e7 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; import org.apache.hadoop.hbase.testclassification.FlakeyTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -60,6 +61,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public static final String REGION_KEY = "testRegion"; private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer.class); + @Before + public void initializeConfig() { + conf.setFloat(StochasticLoadBalancer.MAX_MOVE_PERCENT_KEY, 1.0f); + conf.setFloat(StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY, 0.0f); + loadBalancer.setConf(conf); + } + @Test public void testKeepRegionLoad() throws Exception { @@ -119,7 +127,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { */ @Test public void testBalanceCluster() throws Exception { - for (int[] mockCluster : clusterStateMocks) { Map> servers = mockClusterServers(mockCluster); List list = convertToList(servers); @@ -181,13 +188,13 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { cluster.setNumRegions(10000); cluster.setNumMovedRegions(250); cost = costFunction.cost(); - assertEquals(0.1f, cost, 0.001); + assertEquals(0.025, cost, 0.001); cluster.setNumMovedRegions(1250); cost = costFunction.cost(); - assertEquals(0.5f, cost, 0.001); + assertEquals(0.125, cost, 0.001); cluster.setNumMovedRegions(2500); cost = costFunction.cost(); - assertEquals(1.0f, cost, 0.01); + assertEquals(0.25, cost, 0.01); } } @@ -256,6 +263,25 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { } @Test + public void testMaxMovePercentRespected() { + int numNodes = 2; + int numRegions = 100; + int replication = 1; + int numRegionsPerServer = 1; // one region has 999 regions, other has 1 + int numTables = 1; + + Map> serverMap = createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables); + + for (int i = 0; i < 50; i++) { + conf.setDouble(StochasticLoadBalancer.MAX_MOVE_PERCENT_KEY, i / 100.0); // Should only move i regions per balancer run + + loadBalancer.setConf(conf); + List regionPlans = loadBalancer.balanceCluster(serverMap); + assertEquals(i, regionPlans == null ? 0 : regionPlans.size()); + } + } + + @Test public void testCostFromArray() { Configuration conf = HBaseConfiguration.create(); StochasticLoadBalancer.CostFromRegionLoadFunction @@ -488,7 +514,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { false); } - @Test (timeout = 800000) public void testMidCluster3() { int numNodes = 100; @@ -524,7 +549,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public void testRegionReplicationOnMidClusterSameHosts() { conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec - conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); loadBalancer.setConf(conf); int numHosts = 100; int numRegions = 100 * 100; @@ -569,7 +593,6 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { @Test (timeout = 800000) public void testRegionReplicationOnMidClusterWithRacks() { conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 10000000L); - conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec loadBalancer.setConf(conf); int numNodes = 30; -- 2.10.1 (Apple Git-78)