diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java index e0b8153..5c62f15 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java @@ -72,19 +72,19 @@ public class FavoredNodeAssignmentHelper { public final static short FAVORED_NODES_NUM = 3; public FavoredNodeAssignmentHelper(final List servers, Configuration conf) { + this(servers, new RackManager(conf)); + } + + public FavoredNodeAssignmentHelper(final List servers, + final RackManager rackManager) { this.servers = servers; - this.rackManager = new RackManager(conf); + this.rackManager = rackManager; this.rackToRegionServerMap = new HashMap>(); this.regionServerToRackMap = new HashMap(); this.uniqueRackList = new ArrayList(); this.random = new Random(); } - // For unit tests - void setRackManager(RackManager rackManager) { - this.rackManager = rackManager; - } - /** * Perform full scan of the meta table similar to * {@link MetaReader#fullScan(CatalogTracker, Set, boolean)} except that this is @@ -381,17 +381,19 @@ public class FavoredNodeAssignmentHelper { void initialize() { for (ServerName sn : this.servers) { - String rackName = this.rackManager.getRack(sn); + // store the servername without startcode + ServerName snWithoutStartCode = new ServerName(sn.getHostname(), sn.getPort(), -1); + String rackName = this.rackManager.getRack(snWithoutStartCode); List serverList = this.rackToRegionServerMap.get(rackName); if (serverList == null) { serverList = new ArrayList(); // Add the current rack to the unique rack list this.uniqueRackList.add(rackName); } - if (!serverList.contains(sn)) { - serverList.add(sn); + if (!serverList.contains(snWithoutStartCode)) { + serverList.add(snWithoutStartCode); this.rackToRegionServerMap.put(rackName, serverList); - this.regionServerToRackMap.put(sn, rackName); + this.regionServerToRackMap.put(snWithoutStartCode, rackName); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java index 6309779..a338405 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,7 +33,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.FavoredNodes.Position; +import org.apache.hadoop.hbase.util.Pair; /** * An implementation of the {@link LoadBalancer} that assigns favored nodes for @@ -52,12 +56,12 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class); private FavoredNodes globalFavoredNodesAssignmentPlan; - private Configuration configuration; + private RackManager rackManager; @Override public void setConf(Configuration conf) { - this.configuration = conf; globalFavoredNodesAssignmentPlan = new FavoredNodes(); + this.rackManager = new RackManager(conf); } @Override @@ -76,13 +80,35 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { Map> assignmentMap; try { FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, configuration); + new FavoredNodeAssignmentHelper(servers, rackManager); assignmentHelper.initialize(); if (!assignmentHelper.canPlaceFavoredNodes()) { return super.roundRobinAssignment(regions, servers); } + // Segregate the regions into two types: + // 1. The regions that have favored node assignment, and where at least + // one of the favored node is still alive. In this case, try to adhere + // to the current favored nodes assignment as much as possible - i.e., + // if the current primary is gone, then make the secondary or tertiary + // as the new host for the region. Note that we don't change the favored + // node assignments here (even though one or more favored node is currently + // down). It is up to the balanceCluster to do this hard work. The HDFS + // can handle the fact that some nodes in the favored nodes hint is down + // It'd allocate some other DNs. In combination with stale settings for HDFS, + // we should be just fine. + // 2. The regions that currently don't have favored node assignment. We will + // need to come up with favored nodes assignments for them. The corner case + // in (1) above is that all the nodes are unavailable and in that case, we + // will note that this region doesn't have favored nodes. + Pair>, List> segregatedRegions = + segregateRegions(regions, servers); + Map> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); + List regionsWithNoFavoredNodes = segregatedRegions.getSecond(); assignmentMap = new HashMap>(); - roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regions, servers); + roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes, + servers); + // merge the assignment maps + assignmentMap.putAll(regionsWithFavoredNodesMap); } catch (Exception ex) { LOG.warn("Encountered exception while doing favored-nodes assignment " + ex + " Falling back to regular assignment"); @@ -95,12 +121,23 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { public ServerName randomAssignment(HRegionInfo regionInfo, List servers) { try { FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, configuration); + new FavoredNodeAssignmentHelper(servers, rackManager); assignmentHelper.initialize(); ServerName primary = super.randomAssignment(regionInfo, servers); if (!assignmentHelper.canPlaceFavoredNodes()) { return primary; } + List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); + // check if we have a favored nodes mapping for this region and if so, return + // a server from the favored nodes list if the passed 'servers' contains this + // server as well (available servers, that is) + if (favoredNodes != null) { + for (ServerName s : favoredNodes) { + if (servers.contains(s)) { + return s; + } + } + } List regions = new ArrayList(1); regions.add(regionInfo); Map primaryRSMap = new HashMap(1); @@ -114,6 +151,87 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { } } + private Pair>, List> + segregateRegions(List regions, List availableServers) { + Random rand = new Random(); + Map> assignmentMapForFavoredNodes = + new HashMap>(regions.size() / 2); + List regionsWithNoFavoredNodes = new ArrayList(regions.size()/2); + + for (HRegionInfo region : regions) { + List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); + ServerName primaryHost = null; + ServerName secondaryHost = null; + ServerName tertiaryHost = null; + if (favoredNodes != null) { + for (ServerName s : favoredNodes) { + if (availableServers.contains(s)) { + FavoredNodes.Position position = + FavoredNodes.getFavoredServerPosition(favoredNodes, s); + if (position.equals(Position.PRIMARY)) { + // in the current code this should never be true but might be in the + // future (if someone like the balancer changes the globalFavoredNodesAssignmentPlan + // between the time the method was called and this point). + primaryHost = s; + break; + } else if (position.equals(Position.SECONDARY)) { + secondaryHost = s; + } else if (position.equals(Position.TERTIARY)) { + tertiaryHost = s; + } + } + } + if (primaryHost != null) { + continue; // we already have the region assigned to the primary; do nothing + } + + assignRegionToSecondaryOrTertiary(assignmentMapForFavoredNodes, region, secondaryHost, + tertiaryHost, rand); + } + if (secondaryHost == null && tertiaryHost == null) { //all favored nodes unavailable + regionsWithNoFavoredNodes.add(region); + } + } + return new Pair>, List>( + assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); + } + + private void assignRegionToSecondaryOrTertiary(Map> assignmentMapForFavoredNodes, + HRegionInfo region, ServerName secondaryHost, ServerName tertiaryHost, Random rand) { + if (secondaryHost != null && tertiaryHost != null) { + // assign the region to a random one out of the two + // (both have the desired hdfs blocks) + ServerName s; + if (rand.nextBoolean() == true) { + s = secondaryHost; + } else { + s = tertiaryHost; + } + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(s)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(s, regionsOnServer); + } + regionsOnServer.add(region); + } + if (secondaryHost != null) { + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(secondaryHost)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(secondaryHost, regionsOnServer); + } + regionsOnServer.add(region); + } + if (tertiaryHost != null) { + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(tertiaryHost)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(tertiaryHost, regionsOnServer); + } + regionsOnServer.add(region); + } + } + public List getFavoredNodes(HRegionInfo regionInfo) { return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java index b59a408..4363834 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java @@ -73,4 +73,18 @@ public class FavoredNodes { public synchronized List getFavoredNodes(HRegionInfo region) { return favoredNodesMap.get(region); } + + public static Position getFavoredServerPosition( + List favoredNodes, ServerName server) { + if (favoredNodes == null || server == null || + favoredNodes.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + return null; + } + for (Position p : Position.values()) { + if (favoredNodes.get(p.ordinal()).equals(server)) { + return p; + } + } + return null; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java index 6b644b2..34bd8b6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -68,6 +69,9 @@ public class TestRegionPlacement { private int REGION_NUM = 10; private Map favoredNodesAssignmentPlan = new HashMap(); + private final static int PRIMARY = Position.PRIMARY.ordinal(); + private final static int SECONDARY = Position.SECONDARY.ordinal(); + private final static int TERTIARY = Position.TERTIARY.ordinal(); @BeforeClass public static void setupBeforeClass() throws Exception { @@ -85,27 +89,89 @@ public class TestRegionPlacement { } @Test - public void testGetFavoredNodes() { + public void testFavoredNodesPresentForRoundRobinAssignment() { LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); - HRegionInfo regionInfo = new HRegionInfo("oneregion".getBytes()); List servers = new ArrayList(); for (int i = 0; i < 10; i++) { - ServerName server = new ServerName("foo"+i+":1234",-1); + ServerName server = new ServerName("foo"+i+":1234", -1); servers.add(server); } - // test that we have enough favored nodes after we call randomAssignment - balancer.randomAssignment(regionInfo, servers); - assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regionInfo).size() == 3); - List regions = new ArrayList(100); - for (int i = 0; i < 100; i++) { - HRegionInfo region = new HRegionInfo(("foobar"+i).getBytes()); - regions.add(region); - } - // test that we have enough favored nodes after we call roundRobinAssignment - balancer.roundRobinAssignment(regions, servers); - for (int i = 0; i < 100; i++) { - assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regions.get(i)).size() == 3); + List regions = new ArrayList(1); + HRegionInfo region = new HRegionInfo(("foobar").getBytes()); + regions.add(region); + Map> assignmentMap = balancer.roundRobinAssignment(regions, + servers); + Set serverBefore = assignmentMap.keySet(); + List favoredNodesBefore = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesBefore.size() == 3); + // the primary RS should be the one that the balancer's assignment returns + assertTrue(serverBefore.contains(favoredNodesBefore.get(PRIMARY))); + servers.remove(favoredNodesBefore.get(PRIMARY)); + assignmentMap = balancer.roundRobinAssignment(regions, servers); + List favoredNodesAfter = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesAfter.size() == 3); + // We don't expect the favored nodes assignments to change in multiple calls + // to the roundRobinAssignment method in the balancer (relevant for AssignmentManager.assign + // failures) + assertTrue(favoredNodesAfter.containsAll(favoredNodesBefore)); + Set serverAfter = assignmentMap.keySet(); + // We expect the new RegionServer assignee to be one of the favored nodes + // chosen earlier. + assertTrue(serverAfter.contains(favoredNodesBefore.get(SECONDARY)) || + serverAfter.contains(favoredNodesBefore.get(TERTIARY))); + // Make all the favored nodes unavailable + servers.removeAll(favoredNodesAfter); + assignmentMap = balancer.roundRobinAssignment(regions, servers); + List favoredNodesNow = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesNow.size() == 3); + assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY))); + } + + @Test + public void testFavoredNodesPresentForRandomAssignment() { + LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); + List servers = new ArrayList(); + for (int i = 0; i < 10; i++) { + ServerName server = new ServerName("foo"+i+":1234", -1); + servers.add(server); } + List regions = new ArrayList(1); + HRegionInfo region = new HRegionInfo(("foobar").getBytes()); + regions.add(region); + ServerName serverBefore = balancer.randomAssignment(region, servers); + List favoredNodesBefore = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesBefore.size() == 3); + // the primary RS should be the one that the balancer's assignment returns + assertTrue(serverBefore.equals(favoredNodesBefore.get(PRIMARY))); + servers.remove(serverBefore); + ServerName serverAfter = balancer.randomAssignment(region, servers); + List favoredNodesAfter = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesAfter.size() == 3); + // We don't expect the favored nodes assignments to change in multiple calls + // to the randomAssignment method in the balancer (relevant for AssignmentManager.assign + // failures) + assertTrue(favoredNodesAfter.containsAll(favoredNodesBefore)); + // We expect the new RegionServer assignee to be one of the favored nodes + // chosen earlier. + assertTrue(serverAfter.equals(favoredNodesBefore.get(SECONDARY)) || + serverAfter.equals(favoredNodesBefore.get(TERTIARY))); + // Make all the favored nodes unavailable + servers.removeAll(favoredNodesAfter); + ServerName serverNow = balancer.randomAssignment(region, servers); + List favoredNodesNow = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(serverNow.equals(favoredNodesNow.get(PRIMARY))); + assertTrue(favoredNodesNow.size() == 3); + assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY))); } @Test(timeout = 180000) @@ -204,8 +270,6 @@ public class TestRegionPlacement { HRegionInfo info = MetaScanner.getHRegionInfo(result); byte[] server = result.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - byte[] startCode = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER); byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER); // Add the favored nodes into assignment plan @@ -218,7 +282,7 @@ public class TestRegionPlacement { totalRegionNum.incrementAndGet(); if (server != null) { ServerName serverName = - new ServerName(Bytes.toString(server),Bytes.toLong(startCode)); + new ServerName(Bytes.toString(server), -1); if (favoredNodes != null) { String placement = "[NOT FAVORED NODE]"; for (int i = 0; i < favoredServerList.length; i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java index 843331c..d6f5fd9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java @@ -245,10 +245,9 @@ public class TestFavoredNodeAssignmentHelper { List servers = getServersFromRack(rackToServerCount); FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, new Configuration()); - helper = new FavoredNodeAssignmentHelper(servers, new Configuration()); + helper = new FavoredNodeAssignmentHelper(servers, rackManager); Map> assignmentMap = new HashMap>(); - helper.setRackManager(rackManager); helper.initialize(); // create regions List regions = new ArrayList(regionCount); @@ -269,8 +268,7 @@ public class TestFavoredNodeAssignmentHelper { rackToServerCount.put("rack3", 10); List servers = getServersFromRack(rackToServerCount); FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, - new Configuration()); - helper.setRackManager(rackManager); + rackManager); helper.initialize(); assertTrue(helper.canPlaceFavoredNodes());