diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java index e0b8153..36ba49b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java @@ -72,19 +72,19 @@ public class FavoredNodeAssignmentHelper { public final static short FAVORED_NODES_NUM = 3; public FavoredNodeAssignmentHelper(final List servers, Configuration conf) { + this(servers, new RackManager(conf)); + } + + public FavoredNodeAssignmentHelper(final List servers, + final RackManager rackManager) { this.servers = servers; - this.rackManager = new RackManager(conf); + this.rackManager = rackManager; this.rackToRegionServerMap = new HashMap>(); this.regionServerToRackMap = new HashMap(); this.uniqueRackList = new ArrayList(); this.random = new Random(); } - // For unit tests - void setRackManager(RackManager rackManager) { - this.rackManager = rackManager; - } - /** * Perform full scan of the meta table similar to * {@link MetaReader#fullScan(CatalogTracker, Set, boolean)} except that this is diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java index 6309779..53d84b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java @@ -30,9 +30,13 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.FavoredNodes.Position; +import org.apache.hadoop.hbase.util.Pair; /** * An implementation of the {@link LoadBalancer} that assigns favored nodes for @@ -52,12 +56,12 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class); private FavoredNodes globalFavoredNodesAssignmentPlan; - private Configuration configuration; + private RackManager rackManager; @Override public void setConf(Configuration conf) { - this.configuration = conf; globalFavoredNodesAssignmentPlan = new FavoredNodes(); + this.rackManager = new RackManager(conf); } @Override @@ -76,13 +80,36 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { Map> assignmentMap; try { FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, configuration); + new FavoredNodeAssignmentHelper(servers, rackManager); assignmentHelper.initialize(); if (!assignmentHelper.canPlaceFavoredNodes()) { return super.roundRobinAssignment(regions, servers); } + // Segregate the regions into two types: + // 1. The regions that have favored node assignment, and where at least + // one of the favored node is still alive. In this case, try to adhere + // to the current favored nodes assignment as much as possible - i.e., + // if the current primary is gone, then make the secondary or tertiary + // as the new host for the region (based on their current load). + // Note that we don't change the favored + // node assignments here (even though one or more favored node is currently + // down). It is up to the balanceCluster to do this hard work. The HDFS + // can handle the fact that some nodes in the favored nodes hint is down + // It'd allocate some other DNs. In combination with stale settings for HDFS, + // we should be just fine. + // 2. The regions that currently don't have favored node assignment. We will + // need to come up with favored nodes assignments for them. The corner case + // in (1) above is that all the nodes are unavailable and in that case, we + // will note that this region doesn't have favored nodes. + Pair>, List> segregatedRegions = + segregateRegions(regions, servers); + Map> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); + List regionsWithNoFavoredNodes = segregatedRegions.getSecond(); assignmentMap = new HashMap>(); - roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regions, servers); + roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes, + servers); + // merge the assignment maps + assignmentMap.putAll(regionsWithFavoredNodesMap); } catch (Exception ex) { LOG.warn("Encountered exception while doing favored-nodes assignment " + ex + " Falling back to regular assignment"); @@ -95,12 +122,23 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { public ServerName randomAssignment(HRegionInfo regionInfo, List servers) { try { FavoredNodeAssignmentHelper assignmentHelper = - new FavoredNodeAssignmentHelper(servers, configuration); + new FavoredNodeAssignmentHelper(servers, rackManager); assignmentHelper.initialize(); ServerName primary = super.randomAssignment(regionInfo, servers); if (!assignmentHelper.canPlaceFavoredNodes()) { return primary; } + List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); + // check if we have a favored nodes mapping for this region and if so, return + // a server from the favored nodes list if the passed 'servers' contains this + // server as well (available servers, that is) + if (favoredNodes != null) { + for (ServerName s : favoredNodes) { + if (availableServersContains(servers, s)) { + return s; + } + } + } List regions = new ArrayList(1); regions.add(regionInfo); Map primaryRSMap = new HashMap(1); @@ -114,6 +152,97 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { } } + private Pair>, List> + segregateRegions(List regions, List availableServers) { + Map> assignmentMapForFavoredNodes = + new HashMap>(regions.size() / 2); + List regionsWithNoFavoredNodes = new ArrayList(regions.size()/2); + for (HRegionInfo region : regions) { + List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); + ServerName primaryHost = null; + ServerName secondaryHost = null; + ServerName tertiaryHost = null; + if (favoredNodes != null) { + for (ServerName s : favoredNodes) { + if (availableServersContains(availableServers, s)) { + FavoredNodes.Position position = + FavoredNodes.getFavoredServerPosition(favoredNodes, s); + if (position.equals(Position.PRIMARY)) { + // in the current code this should never be true but might be in the + // future (if someone like the balancer changes the globalFavoredNodesAssignmentPlan + // between the time the method was called and this point). + primaryHost = s; + break; + } else if (position.equals(Position.SECONDARY)) { + secondaryHost = s; + } else if (position.equals(Position.TERTIARY)) { + tertiaryHost = s; + } + } + } + if (primaryHost != null) { + continue; // we already have the region assigned to the primary; do nothing + } + + assignRegionToSecondaryOrTertiary(assignmentMapForFavoredNodes, region, secondaryHost, + tertiaryHost); + } + if (secondaryHost == null && tertiaryHost == null) { //all favored nodes unavailable + regionsWithNoFavoredNodes.add(region); + } + } + return new Pair>, List>( + assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); + } + + private boolean availableServersContains(List servers, ServerName favoredNode) { + for (ServerName server : servers) { + if (ServerName.isSameHostnameAndPort(favoredNode, server)) { + return true; + } + } + return false; + } + + private void assignRegionToSecondaryOrTertiary(Map> assignmentMapForFavoredNodes, HRegionInfo region, + ServerName secondaryHost, ServerName tertiaryHost) { + if (secondaryHost != null && tertiaryHost != null) { + // assign the region to the one with a lower load + // (both have the desired hdfs blocks) + ServerName s; + ServerLoad tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); + ServerLoad secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); + if (secondaryLoad.getLoad() < tertiaryLoad.getLoad()) { + s = secondaryHost; + } else { + s = tertiaryHost; + } + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(s)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(s, regionsOnServer); + } + regionsOnServer.add(region); + } + if (secondaryHost != null) { + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(secondaryHost)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(secondaryHost, regionsOnServer); + } + regionsOnServer.add(region); + } + if (tertiaryHost != null) { + List regionsOnServer = null; + if ((regionsOnServer = assignmentMapForFavoredNodes.get(tertiaryHost)) == null) { + regionsOnServer = new ArrayList(); + assignmentMapForFavoredNodes.put(tertiaryHost, regionsOnServer); + } + regionsOnServer.add(region); + } + } + public List getFavoredNodes(HRegionInfo regionInfo) { return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); } @@ -135,12 +264,18 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap); // now record all the assignments so that we can serve queries later for (HRegionInfo region : regions) { + // Store the favored nodes without startCode for the ServerName objects + // We don't care about the startcode; but only the hostname really List favoredNodesForRegion = new ArrayList(3); - favoredNodesForRegion.add(primaryRSMap.get(region)); + ServerName sn = primaryRSMap.get(region); + favoredNodesForRegion.add(new ServerName(sn.getHostname(), sn.getPort(), + ServerName.NON_STARTCODE)); ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region); if (secondaryAndTertiaryNodes != null) { - favoredNodesForRegion.add(secondaryAndTertiaryNodes[0]); - favoredNodesForRegion.add(secondaryAndTertiaryNodes[1]); + favoredNodesForRegion.add(new ServerName(secondaryAndTertiaryNodes[0].getHostname(), + secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE)); + favoredNodesForRegion.add(new ServerName(secondaryAndTertiaryNodes[1].getHostname(), + secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE)); } globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion); } @@ -148,6 +283,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { void noteFavoredNodes(final Map favoredNodesMap) { for (Map.Entry entry : favoredNodesMap.entrySet()) { + // the META should already have favorednode ServerName objects without startcode globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(entry.getKey(), Arrays.asList(entry.getValue())); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java index b59a408..4363834 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java @@ -73,4 +73,18 @@ public class FavoredNodes { public synchronized List getFavoredNodes(HRegionInfo region) { return favoredNodesMap.get(region); } + + public static Position getFavoredServerPosition( + List favoredNodes, ServerName server) { + if (favoredNodes == null || server == null || + favoredNodes.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + return null; + } + for (Position p : Position.values()) { + if (favoredNodes.get(p.ordinal()).equals(server)) { + return p; + } + } + return null; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java index 6b644b2..f151730 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java @@ -28,6 +28,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -68,6 +69,9 @@ public class TestRegionPlacement { private int REGION_NUM = 10; private Map favoredNodesAssignmentPlan = new HashMap(); + private final static int PRIMARY = Position.PRIMARY.ordinal(); + private final static int SECONDARY = Position.SECONDARY.ordinal(); + private final static int TERTIARY = Position.TERTIARY.ordinal(); @BeforeClass public static void setupBeforeClass() throws Exception { @@ -85,27 +89,91 @@ public class TestRegionPlacement { } @Test - public void testGetFavoredNodes() { + public void testFavoredNodesPresentForRoundRobinAssignment() { LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); - HRegionInfo regionInfo = new HRegionInfo("oneregion".getBytes()); List servers = new ArrayList(); for (int i = 0; i < 10; i++) { - ServerName server = new ServerName("foo"+i+":1234",-1); + ServerName server = new ServerName("foo"+i+":1234", -1); servers.add(server); } - // test that we have enough favored nodes after we call randomAssignment - balancer.randomAssignment(regionInfo, servers); - assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regionInfo).size() == 3); - List regions = new ArrayList(100); - for (int i = 0; i < 100; i++) { - HRegionInfo region = new HRegionInfo(("foobar"+i).getBytes()); - regions.add(region); - } - // test that we have enough favored nodes after we call roundRobinAssignment - balancer.roundRobinAssignment(regions, servers); - for (int i = 0; i < 100; i++) { - assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regions.get(i)).size() == 3); + List regions = new ArrayList(1); + HRegionInfo region = new HRegionInfo(("foobar").getBytes()); + regions.add(region); + Map> assignmentMap = balancer.roundRobinAssignment(regions, + servers); + Set serverBefore = assignmentMap.keySet(); + List favoredNodesBefore = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesBefore.size() == 3); + // the primary RS should be the one that the balancer's assignment returns + assertTrue(ServerName.isSameHostnameAndPort(serverBefore.iterator().next(), + favoredNodesBefore.get(PRIMARY))); + servers.remove(favoredNodesBefore.get(PRIMARY)); + assignmentMap = balancer.roundRobinAssignment(regions, servers); + List favoredNodesAfter = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesAfter.size() == 3); + // We don't expect the favored nodes assignments to change in multiple calls + // to the roundRobinAssignment method in the balancer (relevant for AssignmentManager.assign + // failures) + assertTrue(favoredNodesAfter.containsAll(favoredNodesBefore)); + Set serverAfter = assignmentMap.keySet(); + // We expect the new RegionServer assignee to be one of the favored nodes + // chosen earlier. + assertTrue(ServerName.isSameHostnameAndPort(serverAfter.iterator().next(), + favoredNodesBefore.get(SECONDARY)) || + ServerName.isSameHostnameAndPort(serverAfter.iterator().next(), + favoredNodesBefore.get(TERTIARY))); + // Make all the favored nodes unavailable for assignment + servers.removeAll(favoredNodesAfter); + assignmentMap = balancer.roundRobinAssignment(regions, servers); + List favoredNodesNow = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesNow.size() == 3); + assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY))); + } + + @Test + public void testFavoredNodesPresentForRandomAssignment() { + LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); + List servers = new ArrayList(); + for (int i = 0; i < 10; i++) { + ServerName server = new ServerName("foo"+i+":1234", -1); + servers.add(server); } + List regions = new ArrayList(1); + HRegionInfo region = new HRegionInfo(("foobar").getBytes()); + regions.add(region); + ServerName serverBefore = balancer.randomAssignment(region, servers); + List favoredNodesBefore = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesBefore.size() == 3); + // the primary RS should be the one that the balancer's assignment returns + assertTrue(ServerName.isSameHostnameAndPort(serverBefore,favoredNodesBefore.get(PRIMARY))); + servers.remove(serverBefore); + ServerName serverAfter = balancer.randomAssignment(region, servers); + List favoredNodesAfter = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesAfter.size() == 3); + // We don't expect the favored nodes assignments to change in multiple calls + // to the randomAssignment method in the balancer (relevant for AssignmentManager.assign + // failures) + assertTrue(favoredNodesAfter.containsAll(favoredNodesBefore)); + // We expect the new RegionServer assignee to be one of the favored nodes + // chosen earlier. + assertTrue(ServerName.isSameHostnameAndPort(serverAfter, favoredNodesBefore.get(SECONDARY)) || + ServerName.isSameHostnameAndPort(serverAfter, favoredNodesBefore.get(TERTIARY))); + // Make all the favored nodes unavailable for assignment + servers.removeAll(favoredNodesAfter); + balancer.randomAssignment(region, servers); + List favoredNodesNow = + ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region); + assertTrue(favoredNodesNow.size() == 3); + assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) && + !favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY))); } @Test(timeout = 180000) @@ -204,8 +272,6 @@ public class TestRegionPlacement { HRegionInfo info = MetaScanner.getHRegionInfo(result); byte[] server = result.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER); - byte[] startCode = result.getValue(HConstants.CATALOG_FAMILY, - HConstants.STARTCODE_QUALIFIER); byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER); // Add the favored nodes into assignment plan @@ -218,7 +284,7 @@ public class TestRegionPlacement { totalRegionNum.incrementAndGet(); if (server != null) { ServerName serverName = - new ServerName(Bytes.toString(server),Bytes.toLong(startCode)); + new ServerName(Bytes.toString(server), -1); if (favoredNodes != null) { String placement = "[NOT FAVORED NODE]"; for (int i = 0; i < favoredServerList.length; i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java index 843331c..d6f5fd9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java @@ -245,10 +245,9 @@ public class TestFavoredNodeAssignmentHelper { List servers = getServersFromRack(rackToServerCount); FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, new Configuration()); - helper = new FavoredNodeAssignmentHelper(servers, new Configuration()); + helper = new FavoredNodeAssignmentHelper(servers, rackManager); Map> assignmentMap = new HashMap>(); - helper.setRackManager(rackManager); helper.initialize(); // create regions List regions = new ArrayList(regionCount); @@ -269,8 +268,7 @@ public class TestFavoredNodeAssignmentHelper { rackToServerCount.put("rack3", 10); List servers = getServersFromRack(rackToServerCount); FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, - new Configuration()); - helper.setRackManager(rackManager); + rackManager); helper.initialize(); assertTrue(helper.canPlaceFavoredNodes());