From 62f91f265f8789a772ad7e029354ad94451a8b3a Mon Sep 17 00:00:00 2001 From: zhangduo Date: Thu, 2 Aug 2018 13:08:07 -0700 Subject: [PATCH] HBASE-20996 Backport to branch-2.0 HBASE-20722 "Make RegionServerTracker only depend on children changed event" --- .../hadoop/hbase/client/VersionInfoUtil.java | 2 +- .../org/apache/hadoop/hbase/master/HMaster.java | 56 +++--- .../hadoop/hbase/master/RegionServerTracker.java | 213 +++++++++++---------- .../apache/hadoop/hbase/master/ServerManager.java | 17 +- .../hbase/master/TestAssignmentListener.java | 98 +--------- .../hbase/master/TestClockSkewDetection.java | 2 +- .../hadoop/hbase/master/TestMasterNoCluster.java | 18 +- .../hbase/master/TestShutdownBackupMaster.java | 2 +- 8 files changed, 152 insertions(+), 256 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/VersionInfoUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/VersionInfoUtil.java index 95984de184..cde59ebc6f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/VersionInfoUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/VersionInfoUtil.java @@ -102,7 +102,7 @@ public final class VersionInfoUtil { * @param versionInfo the VersionInfo object to pack * @return the version number as int. (e.g. 0x0103004 is 1.3.4) */ - private static int getVersionNumber(final HBaseProtos.VersionInfo versionInfo) { + public static int getVersionNumber(final HBaseProtos.VersionInfo versionInfo) { if (versionInfo != null) { try { final String[] components = getVersionComponents(versionInfo); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 7ad77652db..a5b9b8819b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -70,7 +70,6 @@ import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.PleaseHoldException; -import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableName; @@ -85,6 +84,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableState; +import org.apache.hadoop.hbase.client.VersionInfoUtil; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; @@ -200,7 +200,6 @@ import org.apache.hbase.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionServerInfo; import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.Quotas; import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.SpaceViolationPolicy; import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; @@ -285,7 +284,7 @@ public class HMaster extends HRegionServer implements MasterServices { // Manager and zk listener for master election private final ActiveMasterManager activeMasterManager; // Region server tracker - RegionServerTracker regionServerTracker; + private RegionServerTracker regionServerTracker; // Draining region server tracker private DrainingServerTracker drainingServerTracker; // Tracker for load balancer state @@ -710,9 +709,11 @@ public class HMaster extends HRegionServer implements MasterServices { /** * Initialize all ZK based system trackers. + * Will be overridden in tests. */ - void initializeZKBasedSystemTrackers() throws IOException, - InterruptedException, KeeperException { + @VisibleForTesting + protected void initializeZKBasedSystemTrackers() + throws IOException, InterruptedException, KeeperException { this.balancer = LoadBalancerFactory.getLoadBalancer(conf); this.normalizer = RegionNormalizerFactory.getRegionNormalizer(conf); this.normalizer.setMasterServices(this); @@ -1038,9 +1039,15 @@ public class HMaster extends HRegionServer implements MasterServices { } /** + *

* Create a {@link ServerManager} instance. + *

+ *

+ * Will be overridden in tests. + *

*/ - ServerManager createServerManager(final MasterServices master) throws IOException { + @VisibleForTesting + protected ServerManager createServerManager(final MasterServices master) throws IOException { // We put this out here in a method so can do a Mockito.spy and stub it out // w/ a mocked up ServerManager. setupClusterConnection(); @@ -1050,17 +1057,11 @@ public class HMaster extends HRegionServer implements MasterServices { private void waitForRegionServers(final MonitoredTask status) throws IOException, InterruptedException { this.serverManager.waitForRegionServers(status); - // Check zk for region servers that are up but didn't register - for (ServerName sn: this.regionServerTracker.getOnlineServers()) { - // The isServerOnline check is opportunistic, correctness is handled inside - if (!this.serverManager.isServerOnline(sn) && - serverManager.checkAndRecordNewServer(sn, ServerMetricsBuilder.of(sn))) { - LOG.info("Registered server found up in zk but who has not yet reported in: " + sn); - } - } } - void initClusterSchemaService() throws IOException, InterruptedException { + // Will be overridden in tests + @VisibleForTesting + protected void initClusterSchemaService() throws IOException, InterruptedException { this.clusterSchemaService = new ClusterSchemaServiceImpl(this); this.clusterSchemaService.startAsync(); try { @@ -1072,14 +1073,14 @@ public class HMaster extends HRegionServer implements MasterServices { } } - void initQuotaManager() throws IOException { + private void initQuotaManager() throws IOException { MasterQuotaManager quotaManager = new MasterQuotaManager(this); this.assignmentManager.setRegionStateListener(quotaManager); quotaManager.start(); this.quotaManager = quotaManager; } - SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() { + private SpaceQuotaSnapshotNotifier createQuotaSnapshotNotifier() { SpaceQuotaSnapshotNotifier notifier = SpaceQuotaSnapshotNotifierFactory.getInstance().create(getConfiguration()); return notifier; @@ -1253,6 +1254,9 @@ public class HMaster extends HRegionServer implements MasterServices { procedureStore.stop(isAborted()); procedureStore = null; } + if (this.regionServerTracker != null) { + this.regionServerTracker.stop(); + } } private void stopChores() { @@ -2616,21 +2620,17 @@ public class HMaster extends HRegionServer implements MasterServices { } public int getRegionServerInfoPort(final ServerName sn) { - RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn); - if (info == null || info.getInfoPort() == 0) { - return conf.getInt(HConstants.REGIONSERVER_INFO_PORT, - HConstants.DEFAULT_REGIONSERVER_INFOPORT); - } - return info.getInfoPort(); + int port = this.serverManager.getInfoPort(sn); + return port == 0 ? conf.getInt(HConstants.REGIONSERVER_INFO_PORT, + HConstants.DEFAULT_REGIONSERVER_INFOPORT) : port; } @Override public String getRegionServerVersion(final ServerName sn) { - RegionServerInfo info = this.regionServerTracker.getRegionServerInfo(sn); - if (info != null && info.hasVersionInfo()) { - return info.getVersionInfo().getVersion(); - } - return "0.0.0"; //Lowest version to prevent move system region to unknown version RS. + // Will return 0 if the server is not online to prevent move system region to unknown version + // RS. + int versionNumber = this.serverManager.getServerVersion(sn); + return VersionInfoUtil.versionNumberToString(versionNumber); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java index 81fc589f43..51371c9cb4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java @@ -1,5 +1,4 @@ /** - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,155 +19,161 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.io.InterruptedIOException; -import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; -import java.util.NavigableMap; -import java.util.TreeMap; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; +import org.apache.hadoop.hbase.ServerMetrics; +import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.VersionInfoUtil; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.ZKListener; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZKWatcher; import org.apache.hadoop.hbase.zookeeper.ZNodePaths; -import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionServerInfo; import org.apache.yetus.audience.InterfaceAudience; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionServerInfo; + /** * Tracks the online region servers via ZK. * - *

Handling of new RSs checking in is done via RPC. This class - * is only responsible for watching for expired nodes. It handles - * listening for changes in the RS node list and watching each node. - * - *

If an RS node gets deleted, this automatically handles calling of + * Handling of new RSs checking in is done via RPC. This class is only responsible for watching for + * expired nodes. It handles listening for changes in the RS node list. The only exception is when + * master restart, we will use the list fetched from zk to construct the initial set of live region + * servers. + *

+ * If an RS node gets deleted, this automatically handles calling of * {@link ServerManager#expireServer(ServerName)} */ @InterfaceAudience.Private public class RegionServerTracker extends ZKListener { private static final Logger LOG = LoggerFactory.getLogger(RegionServerTracker.class); - private final NavigableMap regionServers = new TreeMap<>(); - private ServerManager serverManager; - private MasterServices server; + private final Set regionServers = new HashSet<>(); + private final ServerManager serverManager; + private final MasterServices server; + // As we need to send request to zk when processing the nodeChildrenChanged event, we'd better + // move the operation to a single threaded thread pool in order to not block the zk event + // processing since all the zk listener across HMaster will be called in one thread sequentially. + private final ExecutorService executor; - public RegionServerTracker(ZKWatcher watcher, - MasterServices server, ServerManager serverManager) { + public RegionServerTracker(ZKWatcher watcher, MasterServices server, + ServerManager serverManager) { super(watcher); this.server = server; this.serverManager = serverManager; + executor = Executors.newSingleThreadExecutor( + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("RegionServerTracker-%d").build()); + } + + private Pair getServerInfo(String name) + throws KeeperException, IOException { + ServerName serverName = ServerName.parseServerName(name); + String nodePath = ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, name); + byte[] data; + try { + data = ZKUtil.getData(watcher, nodePath); + } catch (InterruptedException e) { + throw (InterruptedIOException) new InterruptedIOException().initCause(e); + } + if (data == null) { + // we should receive a children changed event later and then we will expire it, so we still + // need to add it to the region server set. + LOG.warn("Server node {} does not exist, already dead?", name); + return Pair.newPair(serverName, null); + } + if (data.length == 0 || !ProtobufUtil.isPBMagicPrefix(data)) { + // this should not happen actually, unless we have bugs or someone has messed zk up. + LOG.warn("Invalid data for region server node {} on zookeeper, data length = {}", name, + data.length); + return Pair.newPair(serverName, null); + } + RegionServerInfo.Builder builder = RegionServerInfo.newBuilder(); + int magicLen = ProtobufUtil.lengthOfPBMagic(); + ProtobufUtil.mergeFrom(builder, data, magicLen, data.length - magicLen); + return Pair.newPair(serverName, builder.build()); } /** * Starts the tracking of online RegionServers. * - *

All RSs will be tracked after this method is called. - * - * @throws KeeperException - * @throws IOException + * All RSs will be tracked after this method is called. */ public void start() throws KeeperException, IOException { watcher.registerListener(this); - List servers = - ZKUtil.listChildrenAndWatchThem(watcher, watcher.znodePaths.rsZNode); - refresh(servers); - } - - private void refresh(final List servers) throws IOException { - synchronized(this.regionServers) { - this.regionServers.clear(); - for (String n: servers) { - ServerName sn = ServerName.parseServerName(ZKUtil.getNodeName(n)); - if (regionServers.get(sn) == null) { - RegionServerInfo.Builder rsInfoBuilder = RegionServerInfo.newBuilder(); - try { - String nodePath = ZNodePaths.joinZNode(watcher.znodePaths.rsZNode, n); - byte[] data = ZKUtil.getData(watcher, nodePath); - if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) { - int magicLen = ProtobufUtil.lengthOfPBMagic(); - ProtobufUtil.mergeFrom(rsInfoBuilder, data, magicLen, data.length - magicLen); - } - if (LOG.isTraceEnabled()) { - LOG.trace("Added tracking of RS " + nodePath); - } - } catch (KeeperException e) { - LOG.warn("Get Rs info port from ephemeral node", e); - } catch (IOException e) { - LOG.warn("Illegal data from ephemeral node", e); - } catch (InterruptedException e) { - throw new InterruptedIOException(); - } - this.regionServers.put(sn, rsInfoBuilder.build()); - } + synchronized (this) { + List servers = + ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.getZNodePaths().rsZNode); + for (String n : servers) { + Pair pair = getServerInfo(n); + ServerName serverName = pair.getFirst(); + RegionServerInfo info = pair.getSecond(); + regionServers.add(serverName); + ServerMetrics serverMetrics = info != null + ? ServerMetricsBuilder.of(serverName, + VersionInfoUtil.getVersionNumber(info.getVersionInfo())) + : ServerMetricsBuilder.of(serverName); + serverManager.checkAndRecordNewServer(serverName, serverMetrics); } } } - private void remove(final ServerName sn) { - synchronized(this.regionServers) { - this.regionServers.remove(sn); - } + public void stop() { + executor.shutdownNow(); } - @Override - public void nodeCreated(String path) { - if (path.startsWith(watcher.znodePaths.rsZNode)) { - String serverName = ZKUtil.getNodeName(path); - LOG.info("RegionServer ephemeral node created, adding [" + serverName + "]"); - if (server.isInitialized()) { - // Only call the check to move servers if a RegionServer was added to the cluster; in this - // case it could be a server with a new version so it makes sense to run the check. - server.checkIfShouldMoveSystemRegionAsync(); + private synchronized void refresh() { + List names; + try { + names = ZKUtil.listChildrenAndWatchForNewChildren(watcher, watcher.getZNodePaths().rsZNode); + } catch (KeeperException e) { + // here we need to abort as we failed to set watcher on the rs node which means that we can + // not track the node deleted evetnt any more. + server.abort("Unexpected zk exception getting RS nodes", e); + return; + } + Set servers = + names.stream().map(ServerName::parseServerName).collect(Collectors.toSet()); + for (Iterator iter = regionServers.iterator(); iter.hasNext();) { + ServerName sn = iter.next(); + if (!servers.contains(sn)) { + LOG.info("RegionServer ephemeral node deleted, processing expiration [{}]", sn); + serverManager.expireServer(sn); + iter.remove(); } } - } - - @Override - public void nodeDeleted(String path) { - if (path.startsWith(watcher.znodePaths.rsZNode)) { - String serverName = ZKUtil.getNodeName(path); - LOG.info("RegionServer ephemeral node deleted, processing expiration [" + - serverName + "]"); - ServerName sn = ServerName.parseServerName(serverName); - if (!serverManager.isServerOnline(sn)) { - LOG.warn(serverName.toString() + " is not online or isn't known to the master."+ - "The latter could be caused by a DNS misconfiguration."); - return; + // here we do not need to parse the region server info as it is useless now, we only need the + // server name. + boolean newServerAdded = false; + for (ServerName sn : servers) { + if (regionServers.add(sn)) { + newServerAdded = true; + LOG.info("RegionServer ephemeral node created, adding [" + sn + "]"); } - remove(sn); - this.serverManager.expireServer(sn); + } + if (newServerAdded && server.isInitialized()) { + // Only call the check to move servers if a RegionServer was added to the cluster; in this + // case it could be a server with a new version so it makes sense to run the check. + server.checkIfShouldMoveSystemRegionAsync(); } } @Override public void nodeChildrenChanged(String path) { - if (path.equals(watcher.znodePaths.rsZNode) - && !server.isAborted() && !server.isStopped()) { - try { - List servers = - ZKUtil.listChildrenAndWatchThem(watcher, watcher.znodePaths.rsZNode); - refresh(servers); - } catch (IOException e) { - server.abort("Unexpected zk exception getting RS nodes", e); - } catch (KeeperException e) { - server.abort("Unexpected zk exception getting RS nodes", e); - } - } - } - - public RegionServerInfo getRegionServerInfo(final ServerName sn) { - return regionServers.get(sn); - } - - /** - * Gets the online servers. - * @return list of online servers - */ - public List getOnlineServers() { - synchronized (this.regionServers) { - return new ArrayList<>(this.regionServers.keySet()); + if (path.equals(watcher.getZNodePaths().rsZNode) && !server.isAborted() && + !server.isStopped()) { + executor.execute(this::refresh); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index b6bc1a15f6..ebccf3c08f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hbase.ServerMetrics; import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.YouAreDeadException; -import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RetriesExhaustedException; @@ -187,19 +186,13 @@ public class ServerManager { /** * Constructor. - * @param master - * @throws ZooKeeperConnectionException */ public ServerManager(final MasterServices master) { - this(master, true); - } - - ServerManager(final MasterServices master, final boolean connect) { this.master = master; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); warningSkew = c.getLong("hbase.master.warningclockskew", 10000); - this.connection = connect? master.getClusterConnection(): null; + this.connection = master.getClusterConnection(); this.rpcControllerFactory = this.connection == null? null: connection.getRpcControllerFactory(); } @@ -228,8 +221,7 @@ public class ServerManager { * @throws IOException */ ServerName regionServerStartup(RegionServerStartupRequest request, int versionNumber, - InetAddress ia) - throws IOException { + InetAddress ia) throws IOException { // Test for case where we get a region startup message from a regionserver // that has been quickly restarted but whose znode expiration handler has // not yet run, or from a server whose fail we are currently processing. @@ -1068,4 +1060,9 @@ public class ServerManager { ServerMetrics serverMetrics = onlineServers.get(serverName); return serverMetrics != null ? serverMetrics.getVersionNumber() : 0; } + + public int getInfoPort(ServerName serverName) { + ServerMetrics serverMetrics = onlineServers.get(serverName); + return serverMetrics != null ? serverMetrics.getInfoServerPort() : 0; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java index e8f739b4e7..1f22830432 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentListener.java @@ -20,18 +20,11 @@ package org.apache.hadoop.hbase.master; import static org.junit.Assert.assertEquals; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.ServerLoad; -import org.apache.hadoop.hbase.ServerMetricsBuilder; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; @@ -45,18 +38,13 @@ import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; -import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.apache.hadoop.hbase.zookeeper.ZKWatcher; -import org.apache.hadoop.hbase.zookeeper.ZNodePaths; import org.junit.AfterClass; -import org.junit.Assert; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; -import org.mockito.Mockito; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -74,16 +62,6 @@ public class TestAssignmentListener { @Rule public TestName name = new TestName(); - private static final Abortable abortable = new Abortable() { - @Override - public boolean isAborted() { - return false; - } - - @Override - public void abort(String why, Throwable e) { - } - }; static class DummyListener { protected AtomicInteger modified = new AtomicInteger(0); @@ -284,7 +262,7 @@ public class TestAssignmentListener { admin.mergeRegionsAsync(regions.get(0).getEncodedNameAsBytes(), regions.get(1).getEncodedNameAsBytes(), true); listener.awaitModifications(expectedModifications); - assertEquals(1, admin.getTableRegions(tableName).size()); + assertEquals(1, admin.getRegions(tableName).size()); assertEquals(expectedLoadCount, listener.getLoadCount()); // new merged region added assertEquals(expectedCloseCount, listener.getCloseCount()); // daughters removed @@ -313,78 +291,4 @@ public class TestAssignmentListener { } return serverCount == 1; } - - @Test - public void testAddNewServerThatExistsInDraining() throws Exception { - // Under certain circumstances, such as when we failover to the Backup - // HMaster, the DrainingServerTracker is started with existing servers in - // draining before all of the Region Servers register with the - // ServerManager as "online". This test is to ensure that Region Servers - // are properly added to the ServerManager.drainingServers when they - // register with the ServerManager under these circumstances. - Configuration conf = TEST_UTIL.getConfiguration(); - ZKWatcher zooKeeper = new ZKWatcher(conf, - "zkWatcher-NewServerDrainTest", abortable, true); - String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, - HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); - String drainingZNode = ZNodePaths.joinZNode(baseZNode, - conf.get("zookeeper.znode.draining.rs", "draining")); - - HMaster master = Mockito.mock(HMaster.class); - Mockito.when(master.getConfiguration()).thenReturn(conf); - - ServerName SERVERNAME_A = ServerName.valueOf("mockserverbulk_a.org", 1000, 8000); - ServerName SERVERNAME_B = ServerName.valueOf("mockserverbulk_b.org", 1001, 8000); - ServerName SERVERNAME_C = ServerName.valueOf("mockserverbulk_c.org", 1002, 8000); - - // We'll start with 2 servers in draining that existed before the - // HMaster started. - ArrayList drainingServers = new ArrayList<>(); - drainingServers.add(SERVERNAME_A); - drainingServers.add(SERVERNAME_B); - - // We'll have 2 servers that come online AFTER the DrainingServerTracker - // is started (just as we see when we failover to the Backup HMaster). - // One of these will already be a draining server. - HashMap onlineServers = new HashMap<>(); - onlineServers.put(SERVERNAME_A, new ServerLoad(ServerMetricsBuilder.of(SERVERNAME_A))); - onlineServers.put(SERVERNAME_C, new ServerLoad(ServerMetricsBuilder.of(SERVERNAME_C))); - - // Create draining znodes for the draining servers, which would have been - // performed when the previous HMaster was running. - for (ServerName sn : drainingServers) { - String znode = ZNodePaths.joinZNode(drainingZNode, sn.getServerName()); - ZKUtil.createAndFailSilent(zooKeeper, znode); - } - - // Now, we follow the same order of steps that the HMaster does to setup - // the ServerManager, RegionServerTracker, and DrainingServerTracker. - ServerManager serverManager = new ServerManager(master); - - RegionServerTracker regionServerTracker = new RegionServerTracker( - zooKeeper, master, serverManager); - regionServerTracker.start(); - - DrainingServerTracker drainingServerTracker = new DrainingServerTracker( - zooKeeper, master, serverManager); - drainingServerTracker.start(); - - // Confirm our ServerManager lists are empty. - Assert.assertEquals(new HashMap(), serverManager.getOnlineServers()); - Assert.assertEquals(new ArrayList(), serverManager.getDrainingServersList()); - - // checkAndRecordNewServer() is how servers are added to the ServerManager. - ArrayList onlineDrainingServers = new ArrayList<>(); - for (ServerName sn : onlineServers.keySet()){ - // Here's the actual test. - serverManager.checkAndRecordNewServer(sn, onlineServers.get(sn)); - if (drainingServers.contains(sn)){ - onlineDrainingServers.add(sn); // keeping track for later verification - } - } - - // Verify the ServerManager lists are correctly updated. - Assert.assertEquals(onlineServers, serverManager.getOnlineServers()); - Assert.assertEquals(onlineDrainingServers, serverManager.getDrainingServersList()); - } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java index c68f94dadf..877c3821d1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClockSkewDetection.java @@ -58,7 +58,7 @@ public class TestClockSkewDetection { when(conn.getRpcControllerFactory()).thenReturn(mock(RpcControllerFactory.class)); return conn; } - }, true); + }); LOG.debug("regionServerStartup 1"); InetAddress ia1 = InetAddress.getLocalHost(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java index 571f8e97f4..c85581f7c1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java @@ -23,8 +23,6 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -201,10 +199,10 @@ public class TestMasterNoCluster { } @Override - void initClusterSchemaService() throws IOException, InterruptedException {} + protected void initClusterSchemaService() throws IOException, InterruptedException {} @Override - ServerManager createServerManager(MasterServices master) throws IOException { + protected ServerManager createServerManager(MasterServices master) throws IOException { ServerManager sm = super.createServerManager(master); // Spy on the created servermanager ServerManager spy = Mockito.spy(sm); @@ -269,23 +267,15 @@ public class TestMasterNoCluster { } @Override - void initClusterSchemaService() throws IOException, InterruptedException {} + protected void initClusterSchemaService() throws IOException, InterruptedException {} @Override - void initializeZKBasedSystemTrackers() throws IOException, InterruptedException, + protected void initializeZKBasedSystemTrackers() throws IOException, InterruptedException, KeeperException { super.initializeZKBasedSystemTrackers(); // Record a newer server in server manager at first getServerManager().recordNewServerWithLock(newServer, new ServerLoad(ServerMetricsBuilder.of(newServer))); - - List onlineServers = new ArrayList<>(); - onlineServers.add(deadServer); - onlineServers.add(newServer); - // Mock the region server tracker to pull the dead server from zk - regionServerTracker = Mockito.spy(regionServerTracker); - Mockito.doReturn(onlineServers).when( - regionServerTracker).getOnlineServers(); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java index 91108aeed5..9475d600a4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java @@ -60,7 +60,7 @@ public class TestShutdownBackupMaster { } @Override - void initClusterSchemaService() throws IOException, InterruptedException { + protected void initClusterSchemaService() throws IOException, InterruptedException { if (ARRIVE != null) { ARRIVE.countDown(); CONTINUE.await(); -- 2.16.3