From 4e1f5d8eba6b418fff4f14889cd0cb78cfabef68 Mon Sep 17 00:00:00 2001 From: haxiaolin Date: Mon, 27 May 2019 09:35:17 +0800 Subject: [PATCH] HBASE-22403 Balance in RSGroup should consider throttling and a failure affects the whole --- .../hadoop/hbase/rsgroup/RSGroupAdminServer.java | 13 ++--- .../org/apache/hadoop/hbase/master/HMaster.java | 68 ++++++++++++---------- .../apache/hadoop/hbase/master/MasterServices.java | 7 +++ .../hbase/master/MockNoopMasterServices.java | 5 ++ .../hadoop/hbase/master/TestCatalogJanitor.java | 5 ++ 5 files changed, 58 insertions(+), 40 deletions(-) diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java index c6c90d9ebe..f0747223c6 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java @@ -318,7 +318,6 @@ public class RSGroupAdminServer implements RSGroupAdmin { @Override public boolean balanceRSGroup(String groupName) throws IOException { ServerManager serverManager = master.getServerManager(); - AssignmentManager assignmentManager = master.getAssignmentManager(); LoadBalancer balancer = master.getLoadBalancer(); boolean balancerRan; @@ -354,16 +353,12 @@ public class RSGroupAdminServer implements RSGroupAdmin { plans.addAll(partialPlans); } } - long startTime = System.currentTimeMillis(); + balancerRan = plans != null; if (plans != null && !plans.isEmpty()) { - LOG.info("Group balance "+groupName+" starting with plan count: "+plans.size()); - for (RegionPlan plan: plans) { - LOG.info("balance " + plan); - assignmentManager.balance(plan); - } - LOG.info("Group balance "+groupName+" completed after "+ - (System.currentTimeMillis()-startTime)+" seconds"); + LOG.info("Group balance " + groupName + " starting with plan count: " + plans.size()); + master.executeRegionPlansWithThrottling(plans); + LOG.info("Group balance " + groupName + " completed"); } } return balancerRan; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index f9d92aba9c..15a97afa85 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1495,7 +1495,6 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return false; } - int maxRegionsInTransition = getMaxRegionsInTransition(); synchronized (this.balancer) { // If balance not true, don't run balancer. if (!this.loadBalancerTracker.isBalancerOn()) return false; @@ -1542,38 +1541,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { if (partialPlans != null) plans.addAll(partialPlans); } - long balanceStartTime = System.currentTimeMillis(); - long cutoffTime = balanceStartTime + this.maxBlancingTime; - int rpCount = 0; // number of RegionPlans balanced so far - if (plans != null && !plans.isEmpty()) { - int balanceInterval = this.maxBlancingTime / plans.size(); - LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is " - + balanceInterval + " ms, and the max number regions in transition is " - + maxRegionsInTransition); - - for (RegionPlan plan: plans) { - LOG.info("balance " + plan); - //TODO: bulk assign - this.assignmentManager.balance(plan); - rpCount++; - - balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition, - cutoffTime); - - // if performing next balance exceeds cutoff time, exit the loop - if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) { - // TODO: After balance, there should not be a cutoff time (keeping it as a security net - // for now) - LOG.debug("No more balancing till next balance run; maxBalanceTime=" - + this.maxBlancingTime); - break; - } - } - } - + List sucRPs = executeRegionPlansWithThrottling(plans); if (this.cpHost != null) { try { - this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans); + this.cpHost.postBalance(sucRPs); } catch (IOException ioe) { // balancing already succeeded so don't change the result LOG.error("Error invoking master coprocessor postBalance()", ioe); @@ -1585,6 +1556,41 @@ public class HMaster extends HRegionServer implements MasterServices, Server { return true; } + public List executeRegionPlansWithThrottling(List plans){ + List sucRPs = new ArrayList<>(); + int maxRegionsInTransition = getMaxRegionsInTransition(); + long balanceStartTime = System.currentTimeMillis(); + long cutoffTime = balanceStartTime + this.maxBlancingTime; + int rpCount = 0; // number of RegionPlans balanced so far + if (plans != null && !plans.isEmpty()) { + int balanceInterval = this.maxBlancingTime / plans.size(); + LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is " + + balanceInterval + " ms, and the max number regions in transition is " + + maxRegionsInTransition); + + for (RegionPlan plan: plans) { + LOG.info("balance " + plan); + //TODO: bulk assign + this.assignmentManager.balance(plan); + sucRPs.add(plan); + rpCount++; + + balanceThrottling(balanceStartTime + rpCount * balanceInterval, + maxRegionsInTransition, cutoffTime); + + // if performing next balance exceeds cutoff time, exit the loop + if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) { + // TODO: After balance, there should not be a cutoff time (keeping it as a security net + // for now) + LOG.debug("No more balancing till next balance run; maxBalanceTime=" + + this.maxBlancingTime); + break; + } + } + } + return sucRPs; + } + /** * Perform normalization of cluster (invoked by {@link RegionNormalizerChore}). * diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index be6fb12d1c..a256f4c421 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -424,4 +424,11 @@ public interface MasterServices extends Server { * @return load balancer */ public LoadBalancer getLoadBalancer(); + + /** + * Execute region plans with throttling + * @param plans to execute + * @return succeeded plans + */ + List executeRegionPlansWithThrottling(List plans); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index db26d37a2f..dfa5fba1d0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -312,4 +312,9 @@ public class MockNoopMasterServices implements MasterServices, Server { public LoadBalancer getLoadBalancer() { return null; } + + @Override + public List executeRegionPlansWithThrottling(List plans) { + return null; + } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index 94166203ca..92a9ba3e63 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -500,6 +500,11 @@ public class TestCatalogJanitor { return null; } + @Override + public List executeRegionPlansWithThrottling(List plans) { + return null; + } + @Override public long disableTable( TableName tableName, -- 2.14.1