From d1db747ce815eeec3974adfedf89c7c8c81f7c05 Mon Sep 17 00:00:00 2001 From: thiruvel Date: Thu, 27 Oct 2016 14:58:27 -0700 Subject: [PATCH] HBASE-16941: FavoredNodes - Split/Merge code paths --- .../java/org/apache/hadoop/hbase/ServerName.java | 2 +- .../hadoop/hbase/StartcodeAgnosticServerName.java | 57 +++ .../hadoop/hbase/favored/FavoredNodesManager.java | 162 +++++++++ .../hadoop/hbase/master/AssignmentManager.java | 68 ++-- .../org/apache/hadoop/hbase/master/HMaster.java | 19 + .../apache/hadoop/hbase/master/MasterServices.java | 6 + .../apache/hadoop/hbase/master/ServerManager.java | 24 ++ .../master/SnapshotOfRegionAssignmentFromMeta.java | 68 +++- .../balancer/FavoredNodeAssignmentHelper.java | 400 +++++++++++++++------ .../master/balancer/FavoredNodeLoadBalancer.java | 121 ++++++- .../hbase/master/balancer/FavoredNodesPlan.java | 9 + .../master/balancer/FavoredNodesPromoter.java | 34 ++ .../hadoop/hbase/client/TestTableFavoredNodes.java | 210 +++++++++++ .../hbase/master/MockNoopMasterServices.java | 6 + .../hadoop/hbase/master/TestRegionPlacement2.java | 2 + .../balancer/TestFavoredNodeAssignmentHelper.java | 326 +++++++++++++++-- 16 files changed, 1333 insertions(+), 181 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/StartcodeAgnosticServerName.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesManager.java create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPromoter.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java index 8d18db0..4ae500a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ServerName.java @@ -98,7 +98,7 @@ import com.google.common.net.InetAddresses; private byte [] bytes; public static final List EMPTY_SERVER_LIST = new ArrayList(0); - private ServerName(final String hostname, final int port, final long startcode) { + protected ServerName(final String hostname, final int port, final long startcode) { // Drop the domain is there is one; no need of it in a local cluster. With it, we get long // unwieldy names. this.hostnameOnly = hostname; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/StartcodeAgnosticServerName.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/StartcodeAgnosticServerName.java new file mode 100644 index 0000000..9faafd9 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/StartcodeAgnosticServerName.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import com.google.common.net.HostAndPort; +import org.apache.hadoop.hbase.util.Addressing; + +public class StartcodeAgnosticServerName extends ServerName { + + public StartcodeAgnosticServerName(final String hostname, final int port, long startcode) { + super(hostname, port, startcode); + } + + public static StartcodeAgnosticServerName valueOf(final ServerName serverName) { + return new StartcodeAgnosticServerName(serverName.getHostname(), serverName.getPort(), + serverName.getStartcode()); + } + + public static StartcodeAgnosticServerName valueOf(final String hostnameAndPort, long startcode) { + return new StartcodeAgnosticServerName(Addressing.parseHostname(hostnameAndPort), + Addressing.parsePort(hostnameAndPort), startcode); + } + + public static StartcodeAgnosticServerName valueOf(final HostAndPort hostnameAndPort, long startcode) { + return new StartcodeAgnosticServerName(hostnameAndPort.getHostText(), + hostnameAndPort.getPort(), startcode); + } + + @Override + public int compareTo(ServerName other) { + int compare = this.getHostname().compareTo(other.getHostname()); + if (compare != 0) return compare; + compare = this.getPort() - other.getPort(); + if (compare != 0) return compare; + return 0; + } + + @Override + public int hashCode() { + return getHostAndPort().hashCode(); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesManager.java new file mode 100644 index 0000000..6ca009e --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesManager.java @@ -0,0 +1,162 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.favored; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta; +import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; +import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +public class FavoredNodesManager { + + private static final Log LOG = LogFactory.getLog(FavoredNodesManager.class); + + private FavoredNodesPlan globalFavoredNodesAssignmentPlan; + private Map> primaryRSToRegionMap; + private Map> secondaryRSToRegionMap; + private Map> teritiaryRSToRegionMap; + + private MasterServices masterServices; + + public FavoredNodesManager(MasterServices masterServices) { + this.masterServices = masterServices; + this.globalFavoredNodesAssignmentPlan = new FavoredNodesPlan(); + this.primaryRSToRegionMap = new HashMap<>(); + this.secondaryRSToRegionMap = new HashMap<>(); + this.teritiaryRSToRegionMap = new HashMap<>(); + } + + public void initialize() throws HBaseIOException { + SnapshotOfRegionAssignmentFromMeta snaphotOfRegionAssignment = + new SnapshotOfRegionAssignmentFromMeta(masterServices.getConnection()); + try { + snaphotOfRegionAssignment.initialize(); + } catch (IOException e) { + throw new HBaseIOException(e); + } + globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan(); + primaryRSToRegionMap = snaphotOfRegionAssignment.getPrimaryToRegionInfoMap(); + secondaryRSToRegionMap = snaphotOfRegionAssignment.getSecondaryToRegionInfoMap(); + teritiaryRSToRegionMap = snaphotOfRegionAssignment.getTertiaryToRegionInfoMap(); + } + + public synchronized List getFavoredNodes(HRegionInfo regionInfo) { + return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); + } + + public synchronized void updateFavoredNodes(HRegionInfo regionInfo, List servers) + throws IOException { + + if (servers.size() != Sets.newHashSet(servers).size()) { + throw new IOException("Duplicates found: " + servers); + } + + if (regionInfo.isSystemTable()) { + LOG.debug("Ignoring favored node update for system table region :" + + regionInfo.getRegionNameAsString()); + return; + } + + if (servers.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + Map> regionToFavoredNodes = new HashMap<>(); + List serversWithNoStartCodes = Lists.newArrayList(); + for (ServerName sn : servers) { + if (sn.getStartcode() == ServerName.NON_STARTCODE) { + serversWithNoStartCodes.add(sn); + } else { + serversWithNoStartCodes.add(ServerName.valueOf(sn.getHostname(), sn.getPort(), + ServerName.NON_STARTCODE)); + } + } + regionToFavoredNodes.put(regionInfo, serversWithNoStartCodes); + FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo( + regionToFavoredNodes, + masterServices.getConnection()); + if (getFavoredNodes(regionInfo) != null) { + deleteFavoredNodesForRegion(Lists.newArrayList(regionInfo)); + } + globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(regionInfo, serversWithNoStartCodes); + addToReplicaLoad(regionInfo, serversWithNoStartCodes); + } else { + throw new IOException("At least " + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM + + " favored nodes should be present for region : " + regionInfo.getEncodedName() + + " current FN servers:" + servers); + } + } + + private synchronized void addToReplicaLoad(HRegionInfo hri, List servers) { + ServerName serverToUse = ServerName.valueOf(servers.get(0).getHostAndPort(), + ServerName.NON_STARTCODE); + List regionList = primaryRSToRegionMap.get(serverToUse); + if (regionList == null) { + regionList = new ArrayList<>(); + } + regionList.add(hri); + primaryRSToRegionMap.put(serverToUse, regionList); + + serverToUse = ServerName + .valueOf(servers.get(1).getHostAndPort(), ServerName.NON_STARTCODE); + regionList = secondaryRSToRegionMap.get(serverToUse); + if (regionList == null) { + regionList = new ArrayList<>(); + } + regionList.add(hri); + secondaryRSToRegionMap.put(serverToUse, regionList); + + serverToUse = ServerName.valueOf(servers.get(2).getHostAndPort(), ServerName.NON_STARTCODE); + regionList = teritiaryRSToRegionMap.get(serverToUse); + if (regionList == null) { + regionList = new ArrayList<>(); + } + regionList.add(hri); + teritiaryRSToRegionMap.put(serverToUse, regionList); + } + + public synchronized void deleteFavoredNodesForRegion(List regionInfoList) { + for (HRegionInfo hri : regionInfoList) { + List favNodes = getFavoredNodes(hri); + if (favNodes != null) { + if (primaryRSToRegionMap.containsKey(favNodes.get(0))) { + primaryRSToRegionMap.get(favNodes.get(0)).remove(hri); + } + if (secondaryRSToRegionMap.containsKey(favNodes.get(1))) { + secondaryRSToRegionMap.get(favNodes.get(1)).remove(hri); + } + if (teritiaryRSToRegionMap.containsKey(favNodes.get(2))) { + teritiaryRSToRegionMap.get(favNodes.get(2)).remove(hri); + } + globalFavoredNodesAssignmentPlan.removeFavoredNodes(hri); + } + } + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 82ae03f..aa0612b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -74,8 +74,7 @@ import org.apache.hadoop.hbase.ipc.FailedServerException; import org.apache.hadoop.hbase.ipc.RpcClient; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.RegionState.State; -import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; -import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer; +import org.apache.hadoop.hbase.master.balancer.FavoredNodesPromoter; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; @@ -230,10 +229,6 @@ public class AssignmentManager { this.regionsToReopen = Collections.synchronizedMap (new HashMap ()); Configuration conf = server.getConfiguration(); - // Only read favored nodes if using the favored nodes load balancer. - this.shouldAssignRegionsWithFavoredNodes = conf.getClass( - HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals( - FavoredNodeLoadBalancer.class); this.tableStateManager = tableStateManager; @@ -243,6 +238,8 @@ public class AssignmentManager { this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong( "hbase.meta.assignment.retry.sleeptime", 1000l); this.balancer = balancer; + // Only read favored nodes if using the favored nodes load balancer. + this.shouldAssignRegionsWithFavoredNodes = this.balancer instanceof FavoredNodesPromoter; int maxThreads = conf.getInt("hbase.assignment.threads.max", 30); this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool( @@ -630,23 +627,19 @@ public class AssignmentManager { } } - // TODO: processFavoredNodes might throw an exception, for e.g., if the - // meta could not be contacted/updated. We need to see how seriously to treat - // this problem as. Should we fail the current assignment. We should be able - // to recover from this problem eventually (if the meta couldn't be updated - // things should work normally and eventually get fixed up). - void processFavoredNodes(List regions) throws IOException { - if (!shouldAssignRegionsWithFavoredNodes) return; - // The AM gets the favored nodes info for each region and updates the meta - // table with that info - Map> regionToFavoredNodes = - new HashMap>(); - for (HRegionInfo region : regions) { - regionToFavoredNodes.put(region, - ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region)); + Map> processFavoredNodesForDaughters(HRegionInfo parent, + HRegionInfo regionA, HRegionInfo regionB) throws IOException { + List onlineServers = this.serverManager.getOnlineServersList(); + return ((FavoredNodesPromoter)this.balancer). + generateFavoredNodesForDaughter(onlineServers, parent, regionA, regionB); + } + + void processFavoredNodesForMerge(HRegionInfo merged, HRegionInfo regionA, HRegionInfo regionB) + throws IOException { + if (shouldAssignRegionsWithFavoredNodes) { + ((FavoredNodesPromoter)this.balancer). + generateFavoredNodesForMergedRegion(merged, regionA, regionB); } - FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, - this.server.getConnection()); } /** @@ -807,7 +800,7 @@ public class AssignmentManager { region, State.PENDING_OPEN, destination); List favoredNodes = ServerName.EMPTY_SERVER_LIST; if (this.shouldAssignRegionsWithFavoredNodes) { - favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region); + favoredNodes = server.getFavoredNodesManager().getFavoredNodes(region); } regionOpenInfos.add(new Pair>( region, favoredNodes)); @@ -1115,7 +1108,7 @@ public class AssignmentManager { try { List favoredNodes = ServerName.EMPTY_SERVER_LIST; if (this.shouldAssignRegionsWithFavoredNodes) { - favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region); + favoredNodes = server.getFavoredNodesManager().getFavoredNodes(region); } serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes); return; // we're done @@ -1300,15 +1293,6 @@ public class AssignmentManager { LOG.warn("Failed to create new plan.",ex); return null; } - if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) { - List regions = new ArrayList(1); - regions.add(region); - try { - processFavoredNodes(regions); - } catch (IOException ie) { - LOG.warn("Ignoring exception in processFavoredNodes " + ie); - } - } this.regionPlans.put(encodedName, randomPlan); } } @@ -1580,7 +1564,6 @@ public class AssignmentManager { processBogusAssignments(bulkPlan); - processFavoredNodes(regions); assign(regions.size(), servers.size(), "round-robin=true", bulkPlan); } @@ -1870,7 +1853,8 @@ public class AssignmentManager { } List favoredNodes = ServerName.EMPTY_SERVER_LIST; if (shouldAssignRegionsWithFavoredNodes) { - favoredNodes = ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(hri); + favoredNodes = + ((MasterServices)server).getFavoredNodesManager().getFavoredNodes(hri); } serverManager.sendRegionOpen(serverName, hri, favoredNodes); return; // we're done @@ -2398,6 +2382,7 @@ public class AssignmentManager { return hri.getShortNameToLog() + " is not splitting on " + serverName; } + Map> favoredNodes = null; final HRegionInfo a = HRegionInfo.convert(transition.getRegionInfo(1)); final HRegionInfo b = HRegionInfo.convert(transition.getRegionInfo(2)); RegionState rs_a = regionStates.getRegionState(a); @@ -2425,6 +2410,10 @@ public class AssignmentManager { try { regionStates.splitRegion(hri, a, b, serverName); + if (this.shouldAssignRegionsWithFavoredNodes) { + favoredNodes = processFavoredNodesForDaughters(hri, a ,b); + this.serverManager.sendFavoredNodes(serverName, favoredNodes); + } } catch (IOException ioe) { LOG.info("Failed to record split region " + hri.getShortNameToLog()); return "Failed to record the splitting in meta"; @@ -2627,6 +2616,15 @@ public class AssignmentManager { regionOffline(b, State.MERGED); regionOnline(hri, serverName, 1); + try { + if (this.shouldAssignRegionsWithFavoredNodes) { + processFavoredNodesForMerge(hri, a, b); + } + } catch (IOException e) { + LOG.error("Error while processing favored nodes after merge.", e); + return StringUtils.stringifyException(e); + } + // User could disable the table before master knows the new region. if (getTableStateManager().isTableState(hri.getTable(), TableState.State.DISABLED, TableState.State.DISABLING)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index eac2fa2..797eb0b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -83,6 +83,7 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; import org.apache.hadoop.hbase.executor.ExecutorType; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; @@ -90,6 +91,7 @@ import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode; import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; +import org.apache.hadoop.hbase.master.balancer.FavoredNodesPromoter; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.LogCleaner; @@ -507,6 +509,9 @@ public class HMaster extends HRegionServer implements MasterServices { return super.getFsTableDescriptors(); } + private boolean isFavoredNodesPromoter; + private FavoredNodesManager favoredNodesManager; + /** * For compatibility, if failed with regionserver credentials, try the master one */ @@ -736,6 +741,10 @@ public class HMaster extends HRegionServer implements MasterServices { this.initializationBeforeMetaAssignment = true; + if (this.balancer instanceof FavoredNodesPromoter) { + isFavoredNodesPromoter = true; + favoredNodesManager = new FavoredNodesManager(this); + } // Wait for regionserver to finish initialization. if (BaseLoadBalancer.tablesOnMaster(conf)) { waitForServerOnline(); @@ -758,6 +767,11 @@ public class HMaster extends HRegionServer implements MasterServices { // assigned when master is shutting down if (isStopped()) return; + //Initialize after meta as it scans meta + if (isFavoredNodesPromoter) { + favoredNodesManager.initialize(); + } + // migrating existent table state from zk, so splitters // and recovery process treat states properly. for (Map.Entry entry : ZKDataMigrator @@ -2860,4 +2874,9 @@ public class HMaster extends HRegionServer implements MasterServices { public LoadBalancer getLoadBalancer() { return balancer; } + + @Override + public FavoredNodesManager getFavoredNodesManager() { + return favoredNodesManager; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 9bdcf76..ec901d0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; @@ -379,4 +380,9 @@ public interface MasterServices extends Server { * @return True if this master is stopping. */ boolean isStopping(); + + /** + * @return Favored Nodes Manager + */ + public FavoredNodesManager getFavoredNodesManager(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 437c787..d9d84c4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -63,6 +63,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ServerInfo; +import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds; import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest; @@ -1204,4 +1205,27 @@ public class ServerManager { removeRegion(hri); } } + + public void sendFavoredNodes(final ServerName server, + Map> favoredNodes) throws IOException { + AdminService.BlockingInterface admin = getRsAdmin(server); + if (admin == null) { + LOG.warn("Attempting to send favored nodes update rpc to server " + server.toString() + + " failed because no RPC connection found to this server"); + } else { + List>> regionUpdateInfos = + new ArrayList>>(); + for (Entry> entry : favoredNodes.entrySet()) { + regionUpdateInfos.add(new Pair>(entry.getKey(), + entry.getValue())); + } + UpdateFavoredNodesRequest request = + RequestConverter.buildUpdateFavoredNodesRequest(regionUpdateInfos); + try { + admin.updateFavoredNodes(null, request); + } catch (ServiceException se) { + throw ProtobufUtil.getRemoteException(se); + } + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java index 39beba8..6acae5c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SnapshotOfRegionAssignmentFromMeta.java @@ -66,7 +66,10 @@ public class SnapshotOfRegionAssignmentFromMeta { private final Map regionNameToRegionInfoMap; /** the regionServer to region map */ - private final Map> regionServerToRegionMap; + private final Map> currentRSToRegionMap; + private final Map> secondaryRSToRegionMap; + private final Map> teritiaryRSToRegionMap; + private final Map> primaryRSToRegionMap; /** the existing assignment plan in the hbase:meta region */ private final FavoredNodesPlan existingAssignmentPlan; private final Set disabledTables; @@ -81,7 +84,10 @@ public class SnapshotOfRegionAssignmentFromMeta { this.connection = connection; tableToRegionMap = new HashMap>(); regionToRegionServerMap = new HashMap(); - regionServerToRegionMap = new HashMap>(); + currentRSToRegionMap = new HashMap>(); + primaryRSToRegionMap = new HashMap>(); + secondaryRSToRegionMap = new HashMap>(); + teritiaryRSToRegionMap = new HashMap>(); regionNameToRegionInfoMap = new TreeMap(); existingAssignmentPlan = new FavoredNodesPlan(); this.disabledTables = disabledTables; @@ -122,6 +128,7 @@ public class SnapshotOfRegionAssignmentFromMeta { addRegion(hri); } + hri = rl.getRegionLocation(0).getRegionInfo(); // the code below is to handle favored nodes byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY, FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER); @@ -132,6 +139,15 @@ public class SnapshotOfRegionAssignmentFromMeta { // Add the favored nodes into assignment plan existingAssignmentPlan.updateFavoredNodesMap(hri, Arrays.asList(favoredServerList)); + for (int i = 0; i < favoredServerList.length; i++) { + if (i == 0) addPrimaryAssignment(hri, favoredServerList[i]); + if (i == 1) addSecondaryAssignment(hri, favoredServerList[i]); + if (i == 2) addTeritiaryAssignment(hri, favoredServerList[i]); + } + if (favoredServerList.length != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + LOG.warn("Insufficient favored nodes for region " + hri + " fn: " + Arrays + .toString(favoredServerList)); + } return true; } catch (RuntimeException e) { LOG.error("Catche remote exception " + e.getMessage() + @@ -169,12 +185,42 @@ public class SnapshotOfRegionAssignmentFromMeta { if (server == null) return; // Process the region server to region map - List regionList = regionServerToRegionMap.get(server); + List regionList = currentRSToRegionMap.get(server); if (regionList == null) { regionList = new ArrayList(); } regionList.add(regionInfo); - regionServerToRegionMap.put(server, regionList); + currentRSToRegionMap.put(server, regionList); + } + + private void addPrimaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List regionList = primaryRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList(); + } + regionList.add(regionInfo); + primaryRSToRegionMap.put(server, regionList); + } + + private void addSecondaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List regionList = secondaryRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList(); + } + regionList.add(regionInfo); + secondaryRSToRegionMap.put(server, regionList); + } + + private void addTeritiaryAssignment(HRegionInfo regionInfo, ServerName server) { + // Process the region server to region map + List regionList = teritiaryRSToRegionMap.get(server); + if (regionList == null) { + regionList = new ArrayList(); + } + regionList.add(regionInfo); + teritiaryRSToRegionMap.put(server, regionList); } /** @@ -206,7 +252,7 @@ public class SnapshotOfRegionAssignmentFromMeta { * @return regionserver to region map */ public Map> getRegionServerToRegionMap() { - return regionServerToRegionMap; + return currentRSToRegionMap; } /** @@ -224,4 +270,16 @@ public class SnapshotOfRegionAssignmentFromMeta { public Set getTableSet() { return this.tableToRegionMap.keySet(); } + + public Map> getSecondaryToRegionInfoMap() { + return this.secondaryRSToRegionMap; + } + + public Map> getTertiaryToRegionInfoMap() { + return this.teritiaryRSToRegionMap; + } + + public Map> getPrimaryToRegionInfoMap() { + return this.primaryRSToRegionMap; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java index 355339e..986fcec 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java @@ -29,13 +29,16 @@ import java.util.Map.Entry; import java.util.Random; import java.util.Set; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.StartcodeAgnosticServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Connection; @@ -49,6 +52,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FavoredNode import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; import com.google.protobuf.InvalidProtocolBufferException; /** @@ -64,11 +69,14 @@ public class FavoredNodeAssignmentHelper { private RackManager rackManager; private Map> rackToRegionServerMap; private List uniqueRackList; - private Map regionServerToRackMap; + // This map serves as a cache for rack to sn lookups. The num of + // region server entries might not match with that is in servers. + private Map regionServerToRackMap; private Random random; private List servers; public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn"); public final static short FAVORED_NODES_NUM = 3; + public final static short MAX_ATTEMPTS_FN_GENERATION = 10; public FavoredNodeAssignmentHelper(final List servers, Configuration conf) { this(servers, new RackManager(conf)); @@ -79,11 +87,33 @@ public class FavoredNodeAssignmentHelper { this.servers = servers; this.rackManager = rackManager; this.rackToRegionServerMap = new HashMap>(); - this.regionServerToRackMap = new HashMap(); + this.regionServerToRackMap = new HashMap(); this.uniqueRackList = new ArrayList(); this.random = new Random(); } + // Always initialize() when FavoredNodeAssignmentHelper is constructed. + public void initialize() { + for (ServerName sn : this.servers) { + String rackName = getRackOfServer(sn); + List serverList = this.rackToRegionServerMap.get(rackName); + if (serverList == null) { + serverList = Lists.newArrayList(); + // Add the current rack to the unique rack list + this.uniqueRackList.add(rackName); + this.rackToRegionServerMap.put(rackName, serverList); + } + for (ServerName serverName : serverList) { + if (ServerName.isSameHostnameAndPort(sn, serverName)) { + // The server is already present, ignore. + break; + } + } + serverList.add(StartcodeAgnosticServerName.valueOf(sn)); + this.regionServerToRackMap.put(sn.getHostname(), rackName); + } + } + /** * Update meta table with favored nodes info * @param regionToFavoredNodes map of HRegionInfo's to their favored nodes @@ -148,8 +178,8 @@ public class FavoredNodeAssignmentHelper { byte[] favoredNodes = getFavoredNodes(favoredNodeList); put.addImmutable(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER, EnvironmentEdgeManager.currentTime(), favoredNodes); - LOG.info("Create the region " + regionInfo.getRegionNameAsString() + - " with favored nodes " + Bytes.toString(favoredNodes)); + LOG.debug("Create the region " + regionInfo.getRegionNameAsString() + + " with favored nodes " + favoredNodeList); } return put; } @@ -180,7 +210,7 @@ public class FavoredNodeAssignmentHelper { HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder(); b.setHostName(s.getHostname()); b.setPort(s.getPort()); - b.setStartCode(s.getStartcode()); + b.setStartCode(ServerName.NON_STARTCODE); f.addFavoredNode(b.build()); } return f.build().toByteArray(); @@ -234,12 +264,14 @@ public class FavoredNodeAssignmentHelper { // Place the current region with the current primary region server primaryRSMap.put(regionInfo, currentServer); - List regionsForServer = assignmentMap.get(currentServer); - if (regionsForServer == null) { - regionsForServer = new ArrayList(); - assignmentMap.put(currentServer, regionsForServer); + if (assignmentMap != null) { + List regionsForServer = assignmentMap.get(currentServer); + if (regionsForServer == null) { + regionsForServer = new ArrayList(); + assignmentMap.put(currentServer, regionsForServer); + } + regionsForServer.add(regionInfo); } - regionsForServer.add(regionInfo); // Set the next processing index if (numIterations % rackList.size() == 0) { @@ -263,7 +295,7 @@ public class FavoredNodeAssignmentHelper { // Create the secondary and tertiary region server pair object. ServerName[] favoredNodes; // Get the rack for the primary region server - String primaryRack = rackManager.getRack(primaryRS); + String primaryRack = getRackOfServer(primaryRS); if (getTotalNumberOfRacks() == 1) { favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack); @@ -319,7 +351,7 @@ public class FavoredNodeAssignmentHelper { ServerName primaryRS = entry.getValue(); try { // Get the rack for the primary region server - String primaryRack = rackManager.getRack(primaryRS); + String primaryRack = getRackOfServer(primaryRS); ServerName[] favoredNodes = null; if (getTotalNumberOfRacks() == 1) { // Single rack case: have to pick the secondary and tertiary @@ -370,10 +402,10 @@ public class FavoredNodeAssignmentHelper { for (HRegionInfo primary : primaries) { secondaryAndTertiary = secondaryAndTertiaryMap.get(primary); if (secondaryAndTertiary != null) { - if (regionServerToRackMap.get(secondaryAndTertiary[0]).equals(secondaryRack)) { + if (getRackOfServer(secondaryAndTertiary[0]).equals(secondaryRack)) { skipServerSet.add(secondaryAndTertiary[0]); } - if (regionServerToRackMap.get(secondaryAndTertiary[1]).equals(secondaryRack)) { + if (getRackOfServer(secondaryAndTertiary[1]).equals(secondaryRack)) { skipServerSet.add(secondaryAndTertiary[1]); } } @@ -440,7 +472,7 @@ public class FavoredNodeAssignmentHelper { // Single rack case: have to pick the secondary and tertiary // from the same rack List serverList = getServersFromRack(primaryRack); - if (serverList.size() <= 2) { + if ((serverList == null) || (serverList.size() <= 2)) { // Single region server case: cannot not place the favored nodes // on any server; return null; @@ -454,14 +486,10 @@ public class FavoredNodeAssignmentHelper { ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet); // Skip the secondary for the tertiary placement serverSkipSet.add(secondaryRS); - - // Place the tertiary RS - ServerName tertiaryRS = - getOneRandomServer(primaryRack, serverSkipSet); + ServerName tertiaryRS = getOneRandomServer(primaryRack, serverSkipSet); if (secondaryRS == null || tertiaryRS == null) { - LOG.error("Cannot place the secondary and terinary" + - "region server for region " + + LOG.error("Cannot place the secondary, tertiary favored node for region " + regionInfo.getRegionNameAsString()); } // Create the secondary and tertiary pair @@ -472,80 +500,57 @@ public class FavoredNodeAssignmentHelper { } } - private ServerName[] multiRackCase(HRegionInfo regionInfo, - ServerName primaryRS, + /** + * Place secondary and tertiary nodes in a multi rack case. + * If there are only two racks, then we try the place the secondary + * and tertiary on different rack than primary. But if the other rack has + * only one region server, then we place primary and tertiary on one rack + * and secondary on another. The aim is two distribute the three favored nodes + * on >= 2 racks. + * TODO: see how we can use generateMissingFavoredNodeMultiRack API here + * @param regionInfo Region for which we are trying to generate FN + * @param primaryRS The primary favored node. + * @param primaryRack The racj of the primary favored node. + * @return Array containing secondary and tertiary favored nodes. + * @throws IOException Signals that an I/O exception has occurred. + */ + private ServerName[] multiRackCase(HRegionInfo regionInfo, ServerName primaryRS, String primaryRack) throws IOException { - // Random to choose the secondary and tertiary region server - // from another rack to place the secondary and tertiary + ListfavoredNodes = Lists.newArrayList(primaryRS); + // Create the secondary and tertiary pair + ServerName secondaryRS = generateMissingFavoredNodeMultiRack(favoredNodes); + favoredNodes.add(secondaryRS); + String secondaryRack = getRackOfServer(secondaryRS); - // Random to choose one rack except for the current rack - Set rackSkipSet = new HashSet(); - rackSkipSet.add(primaryRack); - ServerName[] favoredNodes = new ServerName[2]; - String secondaryRack = getOneRandomRack(rackSkipSet); - List serverList = getServersFromRack(secondaryRack); - if (serverList.size() >= 2) { - // Randomly pick up two servers from this secondary rack - - // Place the secondary RS - ServerName secondaryRS = getOneRandomServer(secondaryRack); - - // Skip the secondary for the tertiary placement - Set skipServerSet = new HashSet(); - skipServerSet.add(secondaryRS); - // Place the tertiary RS - ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet); - - if (secondaryRS == null || tertiaryRS == null) { - LOG.error("Cannot place the secondary and terinary" + - "region server for region " + - regionInfo.getRegionNameAsString()); - } - // Create the secondary and tertiary pair - favoredNodes[0] = secondaryRS; - favoredNodes[1] = tertiaryRS; + ServerName tertiaryRS; + if (primaryRack.equals(secondaryRack)) { + tertiaryRS = generateMissingFavoredNode(favoredNodes); } else { - // Pick the secondary rs from this secondary rack - // and pick the tertiary from another random rack - favoredNodes[0] = getOneRandomServer(secondaryRack); - - // Pick the tertiary - if (getTotalNumberOfRacks() == 2) { - // Pick the tertiary from the same rack of the primary RS - Set serverSkipSet = new HashSet(); - serverSkipSet.add(primaryRS); - favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet); - } else { - // Pick the tertiary from another rack - rackSkipSet.add(secondaryRack); - String tertiaryRandomRack = getOneRandomRack(rackSkipSet); - favoredNodes[1] = getOneRandomServer(tertiaryRandomRack); + // Try to place tertiary in secondary RS rack else place on primary rack. + String tertiaryRack = secondaryRack; + tertiaryRS = getOneRandomServer(tertiaryRack, Sets.newHashSet(secondaryRS)); + if (tertiaryRS == null) { + tertiaryRS = getOneRandomServer(primaryRack, Sets.newHashSet(primaryRS)); + } + if (tertiaryRS == null) { + tertiaryRS = generateMissingFavoredNode(Lists.newArrayList(primaryRS, secondaryRS)); } } - return favoredNodes; - } - boolean canPlaceFavoredNodes() { - int serverSize = this.regionServerToRackMap.size(); - return (serverSize >= FAVORED_NODES_NUM); + if (secondaryRS != null && tertiaryRS != null) { + ServerName[] result = { secondaryRS, tertiaryRS }; + return result; + } else { + throw new IOException("Primary RS = " + primaryRS + + "Secondary RS = " + (secondaryRS != null ? secondaryRS : "N/A") + + " Tertiary RS = " + (tertiaryRS != null ? tertiaryRS : "N/A") + + " could not place favored nodes"); + } } - public void initialize() { - for (ServerName sn : this.servers) { - String rackName = this.rackManager.getRack(sn); - List serverList = this.rackToRegionServerMap.get(rackName); - if (serverList == null) { - serverList = new ArrayList(); - // Add the current rack to the unique rack list - this.uniqueRackList.add(rackName); - } - if (!serverList.contains(sn)) { - serverList.add(sn); - this.rackToRegionServerMap.put(rackName, serverList); - this.regionServerToRackMap.put(sn, rackName); - } - } + boolean canPlaceFavoredNodes() { + return (this.servers.size() >= FAVORED_NODES_NUM); } private int getTotalNumberOfRacks() { @@ -556,31 +561,55 @@ public class FavoredNodeAssignmentHelper { return this.rackToRegionServerMap.get(rack); } - private ServerName getOneRandomServer(String rack, - Set skipServerSet) throws IOException { - if(rack == null) return null; - List serverList = this.rackToRegionServerMap.get(rack); - if (serverList == null) return null; + /** + * Gets a random server from the specified rack and skips anything specified. - // Get a random server except for any servers from the skip set - if (skipServerSet != null && serverList.size() <= skipServerSet.size()) { - throw new IOException("Cannot randomly pick another random server"); + * @param rack rack from a server is needed + * @param skipServerSet the server shouldn't belong to this set + */ + protected ServerName getOneRandomServer(String rack, Set skipServerSet) + throws IOException { + + // Is the rack valid? Do we recognize it? + if (rack == null || getServersFromRack(rack) == null || + getServersFromRack(rack).size() == 0) { + return null; } - ServerName randomServer; - do { - int randomIndex = random.nextInt(serverList.size()); - randomServer = serverList.get(randomIndex); - } while (skipServerSet != null && skipServerSet.contains(randomServer)); + // Lets use a set so we can eliminate duplicates + Set serversToChooseFrom = Sets.newHashSet(); + for (ServerName sn : getServersFromRack(rack)) { + serversToChooseFrom.add(StartcodeAgnosticServerName.valueOf(sn)); + } + + if (skipServerSet != null && skipServerSet.size() > 0) { + for (ServerName sn : skipServerSet) { + serversToChooseFrom.remove(StartcodeAgnosticServerName.valueOf(sn)); + } + // Do we have any servers left to choose from? + if (serversToChooseFrom.size() == 0) { + return null; + } + } - return randomServer; + ServerName randomServer = null; + int randomIndex = random.nextInt(serversToChooseFrom.size()); + int j = 0; + for (StartcodeAgnosticServerName sn : serversToChooseFrom) { + if (j == randomIndex) { + randomServer = sn; + break; + } + j++; + } + return ServerName.valueOf(randomServer.getHostAndPort(), randomServer.getStartcode()); } private ServerName getOneRandomServer(String rack) throws IOException { return this.getOneRandomServer(rack, null); } - private String getOneRandomRack(Set skipRackSet) throws IOException { + protected String getOneRandomRack(Set skipRackSet) throws IOException { if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) { throw new IOException("Cannot randomly pick another random server"); } @@ -603,4 +632,173 @@ public class FavoredNodeAssignmentHelper { } return strBuf.toString(); } + + /* + * Generates a missing favored node based on the input favored nodes. This helps to generate + * new FN when there is already 2 FN and we need a third one. For eg, while generating new FN + * for split daughters after inheriting 2 FN from the parent. If the cluster has only one rack + * it generates from the same rack. If the cluster has multiple racks, then it ensures the new + * FN respects the rack constraints similar to HDFS. For eg: if there are 3 FN, they will be + * spread across 2 racks. + */ + public ServerName generateMissingFavoredNode(List favoredNodes) throws IOException { + if (this.uniqueRackList.size() == 1) { + return generateMissingFavoredNodeSingleRack(favoredNodes, null); + } else { + return generateMissingFavoredNodeMultiRack(favoredNodes, null); + } + } + + public ServerName generateMissingFavoredNode(List favoredNodes, + List excludeNodes) throws IOException { + if (this.uniqueRackList.size() == 1) { + return generateMissingFavoredNodeSingleRack(favoredNodes, excludeNodes); + } else { + return generateMissingFavoredNodeMultiRack(favoredNodes, excludeNodes); + } + } + + /* + * Generate FN for a single rack scenario, don't generate from one of the excluded nodes. Helps + * when we would like to find a replacement node. + */ + private ServerName generateMissingFavoredNodeSingleRack(List favoredNodes, + List excludeNodes) throws IOException { + ServerName newServer = null; + Set excludeFNSet = Sets.newHashSet(favoredNodes); + if (excludeNodes != null && excludeNodes.size() > 0) { + excludeFNSet.addAll(excludeNodes); + } + if (favoredNodes.size() < FAVORED_NODES_NUM) { + newServer = this.getOneRandomServer(this.uniqueRackList.get(0), excludeFNSet); + } + return newServer; + } + + private ServerName generateMissingFavoredNodeMultiRack(List favoredNodes) + throws IOException { + return generateMissingFavoredNodeMultiRack(favoredNodes, null); + } + + /* + * Generates a missing FN based on the input favoredNodes and also the nodes to be skipped. + * + * Get the current layout of favored nodes arrangement and nodes to be excluded and get a + * random node that goes with HDFS block placement. Eg: If the existing nodes are on one rack, + * generate one from another rack. We exclude as much as possible so the random selection + * has more chance to generate a node within a few iterations, ideally 1. + */ + private ServerName generateMissingFavoredNodeMultiRack(List favoredNodes, + List excludeNodes) throws IOException { + + Set racks = Sets.newHashSet(); + Map> rackToFNMapping = new HashMap<>(); + + // Lets understand the current rack distribution of the FN + for (ServerName sn : favoredNodes) { + String rack = getRackOfServer(sn); + racks.add(rack); + + Set serversInRack = rackToFNMapping.get(rack); + if (serversInRack == null) { + serversInRack = Sets.newHashSet(); + rackToFNMapping.put(rack, serversInRack); + } + serversInRack.add(sn); + rackToFNMapping.get(rack).add(sn); + } + + // What racks should be skipped while getting a FN? + Set skipRackSet = Sets.newHashSet(); + + /* + * If both the FN are from the same rack, then we don't want to generate another FN on the + * same rack. If that rack fails, the region would be unavailable. + */ + if (racks.size() == 1 && favoredNodes.size() > 1) { + skipRackSet.add(racks.iterator().next()); + } + + /* + * If there are no free nodes on the existing racks, we should skip those racks too. We can + * reduce the number of iterations for FN selection. + */ + for (String rack : racks) { + if (getServersFromRack(rack) != null && + rackToFNMapping.get(rack).size() == getServersFromRack(rack).size()) { + skipRackSet.add(rack); + } + } + + Set favoredNodeSet = Sets.newHashSet(favoredNodes); + if (excludeNodes != null && excludeNodes.size() > 0) { + favoredNodeSet.addAll(excludeNodes); + } + + /* + * Lets get a random rack by excluding skipRackSet and generate a random FN from that rack. + */ + int i = 0; + Set randomRacks = Sets.newHashSet(); + ServerName newServer = null; + do { + String randomRack = this.getOneRandomRack(skipRackSet); + newServer = this.getOneRandomServer(randomRack, favoredNodeSet); + randomRacks.add(randomRack); + i++; + } while ((i < MAX_ATTEMPTS_FN_GENERATION) && (newServer == null)); + + if (newServer == null) { + if (LOG.isTraceEnabled()) { + LOG.trace(String.format("Unable to generate additional favored nodes for %s after " + + "considering racks %s and skip rack %s with a unique rack list of %s and rack " + + "to RS map of %s and RS to rack map of %s", + StringUtils.join(favoredNodes, ","), randomRacks, skipRackSet, uniqueRackList, + rackToRegionServerMap, regionServerToRackMap)); + } + throw new IOException(" Unable to generate additional favored nodes for " + + StringUtils.join(favoredNodes, ",")); + } + return newServer; + } + + /* + * Generate favored nodes for a region. + * + * Choose a random server as primary and then choose secondary and tertiary FN so its spread + * across two racks. + */ + List generateFavoredNodes(HRegionInfo hri) throws IOException { + + List favoredNodesForRegion = new ArrayList<>(FAVORED_NODES_NUM); + ServerName primary = servers.get(random.nextInt(servers.size())); + favoredNodesForRegion.add(ServerName.valueOf(primary.getHostAndPort(), ServerName.NON_STARTCODE)); + + Map primaryRSMap = new HashMap<>(1); + primaryRSMap.put(hri, primary); + Map secondaryAndTertiaryRSMap = + placeSecondaryAndTertiaryRS(primaryRSMap); + ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(hri); + if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) { + for (ServerName sn : secondaryAndTertiaryNodes) { + favoredNodesForRegion.add(ServerName.valueOf(sn.getHostAndPort(), ServerName.NON_STARTCODE)); + } + return favoredNodesForRegion; + } else { + throw new HBaseIOException("Unable to generate secondary and tertiary favored nodes."); + } + } + + /* + * Get the rack of server from local mapping when present, saves lookup by the RackManager. + */ + private String getRackOfServer(ServerName sn) { + if (this.regionServerToRackMap.containsKey(sn.getHostname())) { + return this.regionServerToRackMap.get(sn.getHostname()); + } else { + String rack = this.rackManager.getRack(sn); + this.regionServerToRackMap.put(sn.getHostname(), rack); + return rack; + } + } } \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java index 7e4fecf..717ca02 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java @@ -18,11 +18,16 @@ package org.apache.hadoop.hbase.master.balancer; +import static org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position.PRIMARY; +import static org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position.SECONDARY; +import static org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position.TERTIARY; + import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,17 +36,17 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.master.RackManager; -import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.master.ServerManager; -import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.master.*; import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position; import org.apache.hadoop.hbase.util.Pair; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + /** * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that * assigns favored nodes for each region. There is a Primary RegionServer that hosts @@ -57,16 +62,23 @@ import org.apache.hadoop.hbase.util.Pair; * */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) -public class FavoredNodeLoadBalancer extends BaseLoadBalancer { +public class FavoredNodeLoadBalancer extends BaseLoadBalancer implements FavoredNodesPromoter { private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class); - private FavoredNodesPlan globalFavoredNodesAssignmentPlan; private RackManager rackManager; + private Configuration conf; + private FavoredNodesManager fnm; @Override public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public synchronized void initialize() throws HBaseIOException { + super.initialize(); super.setConf(conf); - globalFavoredNodesAssignmentPlan = new FavoredNodesPlan(); + this.fnm = services.getFavoredNodesManager(); this.rackManager = new RackManager(conf); super.setConf(conf); } @@ -84,7 +96,6 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { LOG.warn("Not running balancer since exception was thrown " + ie); return plans; } - globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan(); Map serverNameToServerNameWithoutCode = new HashMap(); Map serverNameWithoutCodeToServerName = @@ -102,11 +113,10 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { currentServer.getPort(), ServerName.NON_STARTCODE); List list = entry.getValue(); for (HRegionInfo region : list) { - if(region.getTable().getNamespaceAsString() - .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) { + if(region.getTable().isSystemTable()) { continue; } - List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); + List favoredNodes = fnm.getFavoredNodes(region); if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) { continue; //either favorednodes does not exist or we are already on the primary node } @@ -201,7 +211,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { if (!assignmentHelper.canPlaceFavoredNodes()) { return primary; } - List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); + List favoredNodes = fnm.getFavoredNodes(regionInfo); // check if we have a favored nodes mapping for this region and if so, return // a server from the favored nodes list if the passed 'servers' contains this // server as well (available servers, that is) @@ -233,7 +243,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { new HashMap>(regions.size() / 2); List regionsWithNoFavoredNodes = new ArrayList(regions.size()/2); for (HRegionInfo region : regions) { - List favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region); + List favoredNodes = fnm.getFavoredNodes(region); ServerName primaryHost = null; ServerName secondaryHost = null; ServerName tertiaryHost = null; @@ -310,13 +320,13 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { regionsOnServer.add(region); } - public List getFavoredNodes(HRegionInfo regionInfo) { - return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo); + public synchronized List getFavoredNodes(HRegionInfo regionInfo) { + return this.fnm.getFavoredNodes(regionInfo); } private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper, Map> assignmentMap, - List regions, List servers) { + List regions, List servers) throws IOException { Map primaryRSMap = new HashMap(); // figure the primary RSs assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions); @@ -325,7 +335,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { private void assignSecondaryAndTertiaryNodesForRegion( FavoredNodeAssignmentHelper assignmentHelper, - List regions, Map primaryRSMap) { + List regions, Map primaryRSMap) throws IOException { // figure the secondary and tertiary RSs Map secondaryAndTertiaryRSMap = assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap); @@ -344,10 +354,83 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer { favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE)); } - globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion); + fnm.updateFavoredNodes(region, favoredNodesForRegion); } } + /* + * Generate Favored Nodes for daughters during region split. + * + * If the parent does not have FN, regenerates them for the daughters. + * + * If the parent has FN, inherit two FN from parent for each daughter and generate the remaining. + * The primary FN for both the daughters should be the same as parent. Inherit the secondary + * FN from the parent but keep it different for each daughter. Choose the remaining FN + * randomly. This would give us better distribution over a period of time after enough splits. + */ + @Override + public Map> generateFavoredNodesForDaughter( + List servers, HRegionInfo parent, HRegionInfo regionA, HRegionInfo regionB) + throws IOException { + + Map> result = new HashMap<>(); + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + + List parentFavoredNodes = getFavoredNodes(parent); + if (parentFavoredNodes == null) { + LOG.debug("Unable to find favored nodes for parent, " + parent + + " generating new favored nodes for daughter"); + result.put(regionA, helper.generateFavoredNodes(regionA)); + result.put(regionB, helper.generateFavoredNodes(regionB)); + + } else { + + // Lets get the primary and secondary from parent for regionA + Set regionAFN = + getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY); + result.put(regionA, Lists.newArrayList(regionAFN)); + + // Lets get the primary and tertiary from parent for regionB + Set regionBFN = + getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY); + result.put(regionB, Lists.newArrayList(regionBFN)); + } + fnm.updateFavoredNodes(regionA, result.get(regionA)); + fnm.updateFavoredNodes(regionB, result.get(regionB)); + return result; + } + + private Set getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper, + List parentFavoredNodes, Position primary, Position secondary) + throws IOException { + + Set daughterFN = Sets.newHashSet(); + if (parentFavoredNodes.size() >= primary.ordinal()) { + daughterFN.add(parentFavoredNodes.get(primary.ordinal())); + } + + if (parentFavoredNodes.size() >= secondary.ordinal()) { + daughterFN.add(parentFavoredNodes.get(secondary.ordinal())); + } + + while (daughterFN.size() < FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { + ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN)); + daughterFN.add(newNode); + } + return daughterFN; + } + + /* + * Generate favored nodes for a region during merge. Choose the FN from one of the sources to + * keep it simple. + */ + @Override + public void generateFavoredNodesForMergedRegion(HRegionInfo merged, HRegionInfo regionA, + HRegionInfo regionB) throws IOException { + fnm.updateFavoredNodes(merged, getFavoredNodes(regionA)); + } + @Override public List balanceCluster(TableName tableName, Map> clusterState) throws HBaseIOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java index 17be833..c4e3701 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPlan.java @@ -66,6 +66,15 @@ public class FavoredNodesPlan { } /** + * Remove a favored node assignment + * @param region region + * @return the list of favored region server for this region based on the plan + */ + public List removeFavoredNodes(HRegionInfo region) { + return favoredNodesMap.remove(region); + } + + /** * @param region * @return the list of favored region server for this region based on the plan */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPromoter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPromoter.java new file mode 100644 index 0000000..31116e8 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodesPromoter.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.balancer; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; + +public interface FavoredNodesPromoter { + + Map> generateFavoredNodesForDaughter(List servers, + HRegionInfo parent, HRegionInfo hriA, HRegionInfo hriB) throws IOException; + + void generateFavoredNodesForMergedRegion(HRegionInfo merged, HRegionInfo hriA, + HRegionInfo hriB) throws IOException; +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java new file mode 100644 index 0000000..cfe0591 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableFavoredNodes.java @@ -0,0 +1,210 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import java.util.List; + +import com.google.common.collect.Lists; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; +import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.ServerManager; +import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper; +import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.*; + +@Category({ClientTests.class, MediumTests.class}) +public class TestTableFavoredNodes { + + private static final Log LOG = LogFactory.getLog(TestTableFavoredNodes.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final static int WAIT_TIMEOUT = 60000; + private final static int SLAVES = 8; + private FavoredNodesManager fnm; + private Admin admin; + + private final byte[][] splitKeys = new byte[][] {Bytes.toBytes(1), Bytes.toBytes(9)}; + + @BeforeClass + public static void setupBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + // Setting FavoredNodeBalancer will enable favored nodes + conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, + FavoredNodeLoadBalancer.class, LoadBalancer.class); + conf.set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "" + SLAVES); + TEST_UTIL.startMiniCluster(SLAVES); + TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster(WAIT_TIMEOUT); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.cleanupTestDir(); + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws IOException { + fnm = TEST_UTIL.getMiniHBaseCluster().getMaster().getFavoredNodesManager(); + admin = TEST_UTIL.getAdmin(); + admin.setBalancerRunning(false, true); + } + + /* + * Create a table with FN enabled and check if all its regions have favored nodes set. + */ + @Test + public void testCreateTable() throws Exception { + + TableName tableName = TableName.valueOf("createTable"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + + // All regions should have favored nodes + checkIfAllRegionsHaveFavoredNodes(tableName); + + TEST_UTIL.deleteTable(tableName); + } + + /* + * Check if daughters inherit at-least 2 FN from parent after region split. + */ + @Test + public void testSplitTable() throws Exception { + + TableName tableName = TableName.valueOf("splitRegions"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + int numRegions = admin.getTableRegions(tableName).size(); + + checkIfAllRegionsHaveFavoredNodes(tableName); + + byte[] splitPoint = Bytes.toBytes(0); + RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName); + HRegionInfo parent = locator.getRegionLocation(splitPoint).getRegionInfo(); + List parentFN = fnm.getFavoredNodes(parent); + assertNotNull(parentFN); + + admin.split(tableName, splitPoint); + + TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT); + TEST_UTIL.waitFor(60000, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return admin.getTableRegions(tableName).size() == numRegions + 1; + } + }); + + // All regions should have favored nodes + checkIfAllRegionsHaveFavoredNodes(tableName); + + // Get the daughters of parent. + HRegionInfo daughter1 = locator.getRegionLocation(parent.getStartKey()).getRegionInfo(); + HRegionInfo daughter2 = locator.getRegionLocation(splitPoint).getRegionInfo(); + + checkIfDaughterInherits2FN(parentFN, fnm.getFavoredNodes(daughter1)); + checkIfDaughterInherits2FN(parentFN, fnm.getFavoredNodes(daughter2)); + + TEST_UTIL.deleteTable(tableName); + } + + private void checkIfAllRegionsHaveFavoredNodes(TableName tableName) throws IOException { + for (HRegionInfo regionInfo : TEST_UTIL.getAdmin().getTableRegions(tableName)) { + List fnList = fnm.getFavoredNodes(regionInfo); + assertNotNull("Favored nodes should not be null for region:" + regionInfo, fnList); + assertEquals("Incorrect favored nodes for region:" + regionInfo, + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, fnList.size()); + for (ServerName sn : fnList) { + assertEquals("FN should not have startCode, fnlist:" + fnList, -1, sn.getStartcode()); + } + } + } + + private void checkIfDaughterInherits2FN(List parentFN, + List daughterFN) { + + assertNotNull(parentFN); + assertNotNull(daughterFN); + + List favoredNodes = Lists.newArrayList(daughterFN); + favoredNodes.removeAll(parentFN); + + // With a small cluster its likely some FN might accidentally get shared. + // Its likely the 3rd FN the balancer chooses might still belong to the parent + assertTrue("Daughter FN:" + daughterFN + " should have inherited 2 FN from parent FN:" + + parentFN, favoredNodes.size() <= 1); + } + + /* + * Check if merged region inherits FN from one of its regions. + */ + @Test + public void testMergeTable() throws Exception { + + TableName tableName = TableName.valueOf("mergeRegions"); + TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + int numRegions = admin.getTableRegions(tableName).size(); + + checkIfAllRegionsHaveFavoredNodes(tableName); + + RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName); + HRegionInfo regionA = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); + HRegionInfo regionB = locator.getRegionLocation(Bytes.toBytes(1)).getRegionInfo(); + + List regionAFN = fnm.getFavoredNodes(regionA); + LOG.info("regionA: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionA)); + LOG.info("regionB: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionB)); + + admin.mergeRegionsAsync(regionA.getEncodedNameAsBytes(), + regionB.getEncodedNameAsBytes(), false); + + TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT); + TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate() { + @Override + public boolean evaluate() throws Exception { + return admin.getTableRegions(tableName).size() == numRegions - 1; + } + }); + TEST_UTIL.waitUntilAllRegionsAssigned(tableName); + + // All regions should have favored nodes + checkIfAllRegionsHaveFavoredNodes(tableName); + + HRegionInfo mergedRegion = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo(); + List mergedFN = fnm.getFavoredNodes(mergedRegion); + + assertArrayEquals("Merged region doesn't match regionA's FN", + regionAFN.toArray(), mergedFN.toArray()); + + TEST_UTIL.deleteTable(tableName); + } +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index 87fb169..b68b7fe 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.executor.ExecutorService; +import org.apache.hadoop.hbase.favored.FavoredNodesManager; import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; @@ -348,6 +349,11 @@ public class MockNoopMasterServices implements MasterServices, Server { } @Override + public FavoredNodesManager getFavoredNodesManager() { + return null; + } + + @Override public SnapshotManager getSnapshotManager() { return null; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement2.java index 7b2f920..6aef9b3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement2.java @@ -72,6 +72,7 @@ public class TestRegionPlacement2 { public void testFavoredNodesPresentForRoundRobinAssignment() throws HBaseIOException { LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); balancer.setMasterServices(TEST_UTIL.getMiniHBaseCluster().getMaster()); + balancer.initialize(); List servers = new ArrayList(); for (int i = 0; i < SLAVES; i++) { ServerName server = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i).getServerName(); @@ -132,6 +133,7 @@ public class TestRegionPlacement2 { public void testFavoredNodesPresentForRandomAssignment() throws HBaseIOException { LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration()); balancer.setMasterServices(TEST_UTIL.getMiniHBaseCluster().getMaster()); + balancer.initialize(); List servers = new ArrayList(); for (int i = 0; i < SLAVES; i++) { ServerName server = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i).getServerName(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java index 522b072..5c55bb4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java @@ -18,17 +18,23 @@ package org.apache.hadoop.hbase.master.balancer; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; @@ -39,10 +45,12 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Triple; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.Ignore; import org.junit.experimental.categories.Category; import org.mockito.Mockito; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + @Category({MasterTests.class, SmallTests.class}) public class TestFavoredNodeAssignmentHelper { @@ -51,6 +59,9 @@ public class TestFavoredNodeAssignmentHelper { List>(); private static RackManager rackManager = Mockito.mock(RackManager.class); + // Some tests have randomness, so we run them multiple times + private static final int MAX_ATTEMPTS = 100; + @BeforeClass public static void setupBeforeClass() throws Exception { // Set up some server -> rack mappings @@ -99,7 +110,7 @@ public class TestFavoredNodeAssignmentHelper { return chosenServers; } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSmallCluster() { // Test the case where we cannot assign favored nodes (because the number // of nodes in the cluster is too less) @@ -108,10 +119,11 @@ public class TestFavoredNodeAssignmentHelper { List servers = getServersFromRack(rackToServerCount); FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, new Configuration()); + helper.initialize(); assertFalse(helper.canPlaceFavoredNodes()); } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testPlacePrimaryRSAsRoundRobin() { // Test the regular case where there are many servers in different racks // Test once for few regions and once for many regions @@ -120,7 +132,7 @@ public class TestFavoredNodeAssignmentHelper { primaryRSPlacement(600, null, 10, 10, 10); } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testRoundRobinAssignmentsWithUnevenSizedRacks() { //In the case of uneven racks, the regions should be distributed //proportionately to the rack sizes @@ -137,7 +149,7 @@ public class TestFavoredNodeAssignmentHelper { primaryRSPlacement(459, null, 7, 9, 8); } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSecondaryAndTertiaryPlacementWithSingleRack() { // Test the case where there is a single rack and we need to choose // Primary/Secondary/Tertiary from a single rack. @@ -155,13 +167,15 @@ public class TestFavoredNodeAssignmentHelper { // primary/secondary/tertiary for any given region for (HRegionInfo region : regions) { ServerName[] secondaryAndTertiaryServers = secondaryAndTertiaryMap.get(region); + assertNotNull(secondaryAndTertiaryServers); + assertTrue(primaryRSMap.containsKey(region)); assertTrue(!secondaryAndTertiaryServers[0].equals(primaryRSMap.get(region))); assertTrue(!secondaryAndTertiaryServers[1].equals(primaryRSMap.get(region))); assertTrue(!secondaryAndTertiaryServers[0].equals(secondaryAndTertiaryServers[1])); } } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSecondaryAndTertiaryPlacementWithSingleServer() { // Test the case where we have a single node in the cluster. In this case // the primary can be assigned but the secondary/tertiary would be null @@ -179,7 +193,7 @@ public class TestFavoredNodeAssignmentHelper { assertTrue(secondaryAndTertiaryMap.get(regions.get(0)) == null); } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSecondaryAndTertiaryPlacementWithMultipleRacks() { // Test the case where we have multiple racks and the region servers // belong to multiple racks @@ -203,12 +217,14 @@ public class TestFavoredNodeAssignmentHelper { String primaryRSRack = rackManager.getRack(primaryRSMap.get(entry.getKey())); String secondaryRSRack = rackManager.getRack(allServersForRegion[0]); String tertiaryRSRack = rackManager.getRack(allServersForRegion[1]); - assertTrue(!primaryRSRack.equals(secondaryRSRack)); - assertTrue(secondaryRSRack.equals(tertiaryRSRack)); + Set racks = Sets.newHashSet(primaryRSRack); + racks.add(secondaryRSRack); + racks.add(tertiaryRSRack); + assertTrue(racks.size() >= 2); } } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSecondaryAndTertiaryPlacementWithLessThanTwoServersInRacks() { // Test the case where we have two racks but with less than two servers in each // We will not have enough machines to select secondary/tertiary @@ -229,7 +245,7 @@ public class TestFavoredNodeAssignmentHelper { } } - @Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test + @Test public void testSecondaryAndTertiaryPlacementWithMoreThanOneServerInPrimaryRack() { // Test the case where there is only one server in one rack and another rack // has more servers. We try to choose secondary/tertiary on different @@ -247,18 +263,15 @@ public class TestFavoredNodeAssignmentHelper { assertTrue(primaryRSMap.size() == 6); Map secondaryAndTertiaryMap = helper.placeSecondaryAndTertiaryRS(primaryRSMap); + assertTrue(secondaryAndTertiaryMap.size() == regions.size()); for (HRegionInfo region : regions) { ServerName s = primaryRSMap.get(region); ServerName secondaryRS = secondaryAndTertiaryMap.get(region)[0]; ServerName tertiaryRS = secondaryAndTertiaryMap.get(region)[1]; - if (rackManager.getRack(s).equals("rack1")) { - assertTrue(rackManager.getRack(secondaryRS).equals("rack2") && - rackManager.getRack(tertiaryRS).equals("rack1")); - } - if (rackManager.getRack(s).equals("rack2")) { - assertTrue(rackManager.getRack(secondaryRS).equals("rack1") && - rackManager.getRack(tertiaryRS).equals("rack1")); - } + Set racks = Sets.newHashSet(rackManager.getRack(s)); + racks.add(rackManager.getRack(secondaryRS)); + racks.add(rackManager.getRack(tertiaryRS)); + assertTrue(racks.size() >= 2); } } @@ -361,4 +374,277 @@ public class TestFavoredNodeAssignmentHelper { + " " + thirdRackSize + " " + regionsOnRack1 + " " + regionsOnRack2 + " " + regionsOnRack3; } -} + + @Test + public void testConstrainedPlacement() throws Exception { + List servers = Lists.newArrayList(); + servers.add(ServerName.valueOf("foo" + 1 + ":1234", -1)); + servers.add(ServerName.valueOf("foo" + 2 + ":1234", -1)); + servers.add(ServerName.valueOf("foo" + 15 + ":1234", -1)); + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + List regions = new ArrayList(20); + for (int i = 0; i < 20; i++) { + HRegionInfo region = new HRegionInfo(TableName.valueOf("foobar"), + Bytes.toBytes(i), Bytes.toBytes(i + 1)); + regions.add(region); + } + Map> assignmentMap = + new HashMap>(); + Map primaryRSMap = new HashMap(); + helper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions); + assertTrue(primaryRSMap.size() == regions.size()); + Map secondaryAndTertiary = + helper.placeSecondaryAndTertiaryRS(primaryRSMap); + assertEquals(regions.size(), secondaryAndTertiary.size()); + } + + @Test + public void testGetOneRandomRack() throws IOException { + + Map rackToServerCount = new HashMap<>(); + List rackList = Lists.newArrayList("rack1", "rack2", "rack3"); + for (String rack : rackList) { + rackToServerCount.put(rack, 2); + } + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + // Check we don't get a bad rack on any number of attempts + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + assertTrue(rackList.contains(helper.getOneRandomRack(Sets.newHashSet()))); + } + + // Check skipRack multiple times when an invalid rack is specified + Set skipRacks = Sets.newHashSet("rack"); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + assertTrue(rackList.contains(helper.getOneRandomRack(skipRacks))); + } + + // Check skipRack multiple times when an valid rack is specified + skipRacks = Sets.newHashSet("rack1"); + List validRacks = Lists.newArrayList("rack2", "rack3"); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + assertTrue(validRacks.contains(helper.getOneRandomRack(skipRacks))); + } + } + + @Test + public void testGetRandomServerSingleRack() throws IOException { + + Map rackToServerCount = new HashMap<>(); + final String rack = "rack1"; + rackToServerCount.put(rack, 4); + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + // Check we don't get a bad node on any number of attempts + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + ServerName sn = helper.getOneRandomServer(rack, Sets.newHashSet()); + assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn)); + } + + // Check skipServers multiple times when an invalid server is specified + Set skipServers = + Sets.newHashSet(ServerName.valueOf("invalidnode:1234", ServerName.NON_STARTCODE)); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + ServerName sn = helper.getOneRandomServer(rack, skipServers); + assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn)); + } + + // Check skipRack multiple times when an valid servers are specified + ServerName skipSN = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE); + skipServers = Sets.newHashSet(skipSN); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + ServerName sn = helper.getOneRandomServer(rack, skipServers); + assertNotEquals("Skip server should not be selected ", + skipSN.getHostAndPort(), sn.getHostAndPort()); + assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn)); + } + } + + @Test + public void testGetRandomServerMultiRack() throws IOException { + Map rackToServerCount = new HashMap<>(); + List rackList = Lists.newArrayList("rack1", "rack2", "rack3"); + for (String rack : rackList) { + rackToServerCount.put(rack, 4); + } + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + // Check we don't get a bad node on any number of attempts + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + for (String rack : rackList) { + ServerName sn = helper.getOneRandomServer(rack, Sets.newHashSet()); + assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack), + rackToServers.get(rack).contains(sn)); + } + } + + // Check skipServers multiple times when an invalid server is specified + Set skipServers = + Sets.newHashSet(ServerName.valueOf("invalidnode:1234", ServerName.NON_STARTCODE)); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + for (String rack : rackList) { + ServerName sn = helper.getOneRandomServer(rack, skipServers); + assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack), + rackToServers.get(rack).contains(sn)); + } + } + + // Check skipRack multiple times when an valid servers are specified + ServerName skipSN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE); + ServerName skipSN2 = ServerName.valueOf("foo10:1234", ServerName.NON_STARTCODE); + ServerName skipSN3 = ServerName.valueOf("foo20:1234", ServerName.NON_STARTCODE); + skipServers = Sets.newHashSet(skipSN1, skipSN2, skipSN3); + for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) { + for (String rack : rackList) { + ServerName sn = helper.getOneRandomServer(rack, skipServers); + assertFalse("Skip server should not be selected ", skipServers.contains(sn)); + assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack), + rackToServers.get(rack).contains(sn)); + } + } + } + + @Test + public void testGetFavoredNodes() throws IOException { + Map rackToServerCount = new HashMap<>(); + List rackList = Lists.newArrayList("rack1", "rack2", "rack3"); + for (String rack : rackList) { + rackToServerCount.put(rack, 4); + } + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + HRegionInfo region = new HRegionInfo(TableName.valueOf("foobar"), + HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); + + for (int maxattempts = 0; maxattempts < MAX_ATTEMPTS; maxattempts++) { + List fn = helper.generateFavoredNodes(region); + checkDuplicateFN(fn); + checkFNRacks(fn); + } + } + + @Test + public void testGenMissingFavoredNodeOneRack() throws IOException { + Map rackToServerCount = new HashMap<>(); + final String rack = "rack1"; + rackToServerCount.put(rack, 6); + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + + ServerName snRack1SN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE); + ServerName snRack1SN2 = ServerName.valueOf("foo2:1234", ServerName.NON_STARTCODE); + ServerName snRack1SN3 = ServerName.valueOf("foo3:1234", ServerName.NON_STARTCODE); + + List fn = Lists.newArrayList(snRack1SN1, snRack1SN2); + for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) { + checkDuplicateFN(fn, helper.generateMissingFavoredNode(fn)); + } + + fn = Lists.newArrayList(snRack1SN1, snRack1SN2); + List skipServers = Lists.newArrayList(snRack1SN3); + for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) { + ServerName genSN = helper.generateMissingFavoredNode(fn, skipServers); + checkDuplicateFN(fn, genSN); + assertNotEquals("Generated FN should not match excluded one", snRack1SN3, genSN); + } + } + + @Test + public void testGenMissingFavoredNodeMultiRack() throws IOException { + + ServerName snRack1SN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE); + ServerName snRack1SN2 = ServerName.valueOf("foo2:1234", ServerName.NON_STARTCODE); + ServerName snRack2SN1 = ServerName.valueOf("foo10:1234", ServerName.NON_STARTCODE); + ServerName snRack2SN2 = ServerName.valueOf("foo11:1234", ServerName.NON_STARTCODE); + + Map rackToServerCount = new HashMap<>(); + List rackList = Lists.newArrayList("rack1", "rack2"); + for (String rack : rackList) { + rackToServerCount.put(rack, 4); + } + List servers = getServersFromRack(rackToServerCount); + + FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); + helper.initialize(); + assertTrue(helper.canPlaceFavoredNodes()); + + List fn = Lists.newArrayList(snRack1SN1, snRack1SN2); + for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) { + ServerName genSN = helper.generateMissingFavoredNode(fn); + checkDuplicateFN(fn, genSN); + checkFNRacks(fn, genSN); + } + + fn = Lists.newArrayList(snRack1SN1, snRack2SN1); + for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) { + ServerName genSN = helper.generateMissingFavoredNode(fn); + checkDuplicateFN(fn, genSN); + checkFNRacks(fn, genSN); + } + + fn = Lists.newArrayList(snRack1SN1, snRack2SN1); + List skipServers = Lists.newArrayList(snRack2SN2); + for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) { + ServerName genSN = helper.generateMissingFavoredNode(fn, skipServers); + checkDuplicateFN(fn, genSN); + checkFNRacks(fn, genSN); + assertNotEquals("Generated FN should not match excluded one", snRack2SN2, genSN); + } + } + + private void checkDuplicateFN(List fnList, ServerName genFN) { + Set favoredNodes = Sets.newHashSet(fnList); + assertNotNull("Generated FN can't be null", genFN); + favoredNodes.add(genFN); + assertEquals("Did not find expected number of favored nodes", + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favoredNodes.size()); + } + + private void checkDuplicateFN(List fnList) { + Set favoredNodes = Sets.newHashSet(fnList); + assertEquals("Did not find expected number of favored nodes", + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favoredNodes.size()); + } + + private void checkFNRacks(List fnList, ServerName genFN) { + Set favoredNodes = Sets.newHashSet(fnList); + favoredNodes.add(genFN); + Set racks = Sets.newHashSet(); + for (ServerName sn : favoredNodes) { + racks.add(rackManager.getRack(sn)); + } + assertTrue("FN should be spread atleast across 2 racks", racks.size() >= 2); + } + + private void checkFNRacks(List fnList) { + Set favoredNodes = Sets.newHashSet(fnList); + Set racks = Sets.newHashSet(); + for (ServerName sn : favoredNodes) { + racks.add(rackManager.getRack(sn)); + } + assertTrue("FN should be spread atleast across 2 racks", racks.size() >= 2); + } +} \ No newline at end of file -- 2.10.1