From f1847e5d39b51a6ed96b3dafdde28edbfdbef5f6 Mon Sep 17 00:00:00 2001 From: xcang Date: Tue, 23 Oct 2018 15:22:32 -0700 Subject: [PATCH] HBASE-21373 (backport from HBASE-21338) Warn if balancer is an ill-fit for cluster size --- .../master/balancer/StochasticLoadBalancer.java | 28 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 374070ceb6..a9f218a3e3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -108,6 +108,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { "hbase.master.balancer.stochastic.stepsPerRegion"; protected static final String MAX_STEPS_KEY = "hbase.master.balancer.stochastic.maxSteps"; + protected static final String RUN_MAX_STEPS_KEY = + "hbase.master.balancer.stochastic.runMaxSteps"; protected static final String MAX_RUNNING_TIME_KEY = "hbase.master.balancer.stochastic.maxRunningTime"; protected static final String KEEP_REGION_LOADS = @@ -123,6 +125,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // values are defaults private int maxSteps = 1000000; + private boolean runMaxSteps = false; private int stepsPerRegion = 800; private long maxRunningTime = 30 * 1000 * 1; // 30 seconds. private int numRegionLoadsToRemember = 15; @@ -169,6 +172,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion); maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime); + runMaxSteps = conf.getBoolean(RUN_MAX_STEPS_KEY, runMaxSteps); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); @@ -371,14 +375,30 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i = 0; i < this.curFunctionCosts.length; i++) { curFunctionCosts[i] = tempFunctionCosts[i]; } - LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" - + functionCost()); double initCost = currentCost; double newCost = currentCost; - long computedMaxSteps = Math.min(this.maxSteps, - ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers)); + long computedMaxSteps = 0; + if (runMaxSteps) { + computedMaxSteps = Math.max(this.maxSteps, + ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers)); + } else { + long calculatedMaxSteps = + (long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers; + computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps); + if (calculatedMaxSteps > maxSteps) { + LOG.warn(String.format("calculatedMaxSteps:%d for loadbalancer's stochastic walk is larger than " + + "maxSteps:%dß. Hence load balancing may not work well. Setting parameter " + + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true can overcome this issue." + + "(This config change does not require service restart)", calculatedMaxSteps, + maxRunningTime)); + + } + } + LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" + + functionCost() + " computedMaxSteps: " + computedMaxSteps); + // Perform a stochastic walk to see if we can get a good fit. long step; -- 2.15.1 (Apple Git-101)