diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0e6f13d7b2..7785a9b9dd 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1391,13 +1391,15 @@ public class HMaster extends HRegionServer implements MasterServices { Map>> assignmentsByTable = this.assignmentManager.getRegionStates().getAssignmentsByTable(); + augmentAssignments(assignmentsByTable); List plans = new ArrayList<>(); - //Give the balancer the current cluster state. + // Give the balancer the current cluster state. + // It should handle dead servers, which can be present + // assignmentsByTable this.balancer.setClusterStatus(getClusterStatus()); - this.balancer.setClusterLoad( - this.assignmentManager.getRegionStates().getAssignmentsByTable()); + this.balancer.setClusterLoad(assignmentsByTable); for (Entry>> e : assignmentsByTable.entrySet()) { List partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue()); @@ -1442,11 +1444,50 @@ public class HMaster extends HRegionServer implements MasterServices { } } } + //Remove dead servers from AM.regionStates + refreshAssignmentManager(); // If LoadBalancer did not generate any plans, it means the cluster is already balanced. // Return true indicating a success. return true; } + private void refreshAssignmentManager() throws InterruptedIOException { + RegionStates regionStates = assignmentManager.getRegionStates(); + ClusterStatus clusterStatus = getClusterStatus(); + regionStates.update(clusterStatus); + } + + /** + * Adds newly added servers (with zero assigned regions) + * @param assignmentsByTable + * @throws InterruptedIOException + */ + private void augmentAssignments( + Map>> assignmentsByTable) + throws InterruptedIOException { + + ClusterStatus status = null; + try { + status = getClusterStatus(); + } catch (InterruptedIOException e) { + Thread.currentThread().interrupt(); + throw e; + } + Collection servers = status.getServers(); + // For every table + for (TableName table : assignmentsByTable.keySet()) { + Map> serverMap = assignmentsByTable.get(table); + Set keySet = serverMap.keySet(); + for (ServerName sn : servers) { + if (keySet.contains(sn)) { + continue; + } + List list = new ArrayList(); + serverMap.put(sn, list); + } + } + } + @Override @VisibleForTesting public RegionNormalizer getRegionNormalizer() { diff --git hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index df55c94b19..226e2d0f28 100644 --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -24,12 +24,13 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; @@ -37,6 +38,7 @@ import java.util.concurrent.ConcurrentSkipListMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; @@ -185,7 +187,7 @@ public class RegionStates { this.openSeqNum = seqId; } - + public ServerName setRegionLocation(final ServerName serverName) { ServerName lastRegionLocation = this.regionLocation; if (LOG.isTraceEnabled() && serverName == null) { @@ -277,7 +279,7 @@ public class RegionStates { public String toString() { return toDescriptiveString(); } - + public String toShortString() { // rit= is the current Region-In-Transition State -- see State enum. return String.format("rit=%s, location=%s", getState(), getRegionLocation()); @@ -613,6 +615,20 @@ public class RegionStates { } } + + public void update(ClusterStatus status) { + Collection servers = status.getServers(); + List toDelete = new ArrayList(); + for (ServerName name: serverMap.keySet()) { + if(!servers.contains(name)) { + toDelete.add(name); + } + } + for(ServerName name: toDelete) { + serverMap.remove(name); + } + } + // ============================================================================================ // TODO: // ============================================================================================ @@ -731,6 +747,10 @@ public class RegionStates { public Map>> getAssignmentsByTable() { final Map>> result = new HashMap<>(); + Enumeration en = serverMap.keys(); + while (en.hasMoreElements()) { + LOG.debug("server: "+ en.nextElement()); + } for (RegionStateNode node: regionsMap.values()) { Map> tableResult = result.get(node.getTable()); if (tableResult == null) { @@ -931,12 +951,27 @@ public class RegionStates { int numServers = 0; int totalLoad = 0; for (ServerStateNode node: serverMap.values()) { + if (isMaster(node)) { + // Skip Master when calculate load + continue; + } totalLoad += node.getRegionCount(); numServers++; } + return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers; } + private boolean isMaster(ServerStateNode node) { + Set nodes = node.getRegions(); + for (RegionStateNode rn : nodes) { + if (rn.getRegionInfo().isMetaRegion()) { + return true; + } + } + return false; + } + public ServerStateNode addRegionToServer(final ServerName serverName, final RegionStateNode regionNode) { ServerStateNode serverNode = getOrCreateServer(serverName); diff --git hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java index cff1a8d744..f131d664bf 100644 --- hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java +++ hbase-server/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.apache.hadoop.hbase.util.Threads; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -52,8 +51,7 @@ import org.junit.runners.Parameterized.Parameters; /** * Test whether region re-balancing works. (HBASE-71) */ -@Ignore // This is broken since new RegionServers does proper average of regions -// and because Master is treated as a regionserver though it hosts two regions only. + @Category({FlakeyTests.class, LargeTests.class}) @RunWith(value = Parameterized.class) public class TestRegionRebalancing { @@ -119,6 +117,7 @@ public class TestRegionRebalancing { // add a region server - total of 2 LOG.info("Started second server=" + UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName()); + UTIL.getHBaseCluster().getMaster().balance(); assertRegionsAreBalanced(); @@ -176,7 +175,7 @@ public class TestRegionRebalancing { // TODO: Fix this test. Old balancer used to run with 'slop'. New // balancer does not. boolean success = false; - float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f); + float slop = UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f); if (slop <= 0) slop = 1; for (int i = 0; i < 5; i++) { @@ -185,6 +184,8 @@ public class TestRegionRebalancing { waitForAllRegionsAssigned(); long regionCount = UTIL.getMiniHBaseCluster().countServedRegions(); + // Less two system regions meta and ns + regionCount -= 2; List servers = getOnlineRegionServers(); double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad(); int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));