From f5ff2e0aaf414c9e16f2312cd93ff586b0001d87 Mon Sep 17 00:00:00 2001 From: zhangduo Date: Sat, 20 Jan 2018 20:11:10 +0800 Subject: [PATCH] HBASE-19794 TestZooKeeper hangs Kill backup master before first Add some cleanup around NamespaceManager Shorten the timeout waiting on namespace manager as workaround until we have better soln for interrupting ongoing client rpcs. Do it in general for all tests. --- hbase-common/src/main/resources/hbase-default.xml | 6 +++++ .../apache/hadoop/hbase/ZKNamespaceManager.java | 4 ++++ .../hbase/master/ClusterSchemaServiceImpl.java | 4 ++++ .../org/apache/hadoop/hbase/master/HMaster.java | 8 ++++--- .../hadoop/hbase/master/TableNamespaceManager.java | 27 +++++++++++++++++++++- .../org/apache/hadoop/hbase/MiniHBaseCluster.java | 16 ++++++++++--- .../hadoop/hbase/client/TestZKAsyncRegistry.java | 26 +++++++++++---------- 7 files changed, 72 insertions(+), 19 deletions(-) diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index bfae33ce99..26865deb1d 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -1802,4 +1802,10 @@ possible configurations would overwhelm and obscure the important. Number of rows in a batch operation above which a warning will be logged. + + hbase.master.wait.on.service.seconds + 30 + Default is 5 minutes. Make it 30 seconds for tests. See + HBASE-19794 for some context. + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java index c6dc405291..53b2439824 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ZKNamespaceManager.java @@ -76,6 +76,10 @@ public class ZKNamespaceManager extends ZKListener { } } + public void stop() throws IOException { + this.watcher.unregisterListener(this); + } + public NamespaceDescriptor get(String name) { return cache.get(name); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java index 4527bc0055..4dd8de0152 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ClusterSchemaServiceImpl.java @@ -69,6 +69,10 @@ class ClusterSchemaServiceImpl extends AbstractService implements ClusterSchemaS protected void doStop() { // This is no stop for the table manager. notifyStopped(); + TableNamespaceManager tnsm = getTableNamespaceManager(); + if (tnsm != null) { + tnsm.stop("Stopping"); + } } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0dd7f6217e..09b18bcaa5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2692,9 +2692,11 @@ public class HMaster extends HRegionServer implements MasterServices { @Override public void stop(String msg) { - super.stop(msg); - if (this.activeMasterManager != null) { - this.activeMasterManager.stop(); + if (!isStopped()) { + super.stop(msg); + if (this.activeMasterManager != null) { + this.activeMasterManager.stop(); + } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java index 5de7dc548e..47b27f441f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableNamespaceManager.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZKNamespaceManager; import org.apache.hadoop.hbase.client.Delete; @@ -68,8 +69,9 @@ import org.slf4j.LoggerFactory; @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC", justification="TODO: synchronize access on nsTable but it is done in tiers above and this " + "class is going away/shrinking") -public class TableNamespaceManager { +public class TableNamespaceManager implements Stoppable { private static final Logger LOG = LoggerFactory.getLogger(TableNamespaceManager.class); + private volatile boolean stopped = false; private Configuration conf; private MasterServices masterServices; @@ -368,4 +370,27 @@ public class TableNamespaceManager { } return maxRegions; } + + @Override + public boolean isStopped() { + return this.stopped; + } + + @Override + public void stop(String why) { + if (this.stopped) { + return; + } + try { + this.zkNamespaceManager.stop(); + } catch (IOException ioe) { + LOG.warn("Failed NamespaceManager close", ioe); + } + try { + this.nsTable.close(); + } catch (IOException ioe) { + LOG.warn("Failed Namespace Table close", ioe); + } + this.stopped = true; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 3a40477fbb..378f6ecc72 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -837,12 +837,22 @@ public class MiniHBaseCluster extends HBaseCluster { * impossible to bring the mini-cluster back for clean shutdown. */ public void killAll() { + // Do backups first. + MasterThread activeMaster = null; + for (MasterThread masterThread : getMasterThreads()) { + if (!masterThread.getMaster().isActiveMaster()) { + masterThread.getMaster().abort("killAll"); + } else { + activeMaster = masterThread; + } + } + // Do active after. + if (activeMaster != null) { + activeMaster.getMaster().abort("killAll"); + } for (RegionServerThread rst : getRegionServerThreads()) { rst.getRegionServer().abort("killAll"); } - for (MasterThread masterThread : getMasterThreads()) { - masterThread.getMaster().abort("killAll", new Throwable()); - } } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java index 347854916c..d4d3c085d9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestZKAsyncRegistry.java @@ -122,19 +122,21 @@ public class TestZKAsyncRegistry { @Test public void testIndependentZKConnections() throws IOException { - ReadOnlyZKClient zk1 = REGISTRY.getZKClient(); + try (ReadOnlyZKClient zk1 = REGISTRY.getZKClient()) { + Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration()); + otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); + try (ZKAsyncRegistry otherRegistry = new ZKAsyncRegistry(otherConf)) { + ReadOnlyZKClient zk2 = otherRegistry.getZKClient(); - Configuration otherConf = new Configuration(TEST_UTIL.getConfiguration()); - otherConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); - try (ZKAsyncRegistry otherRegistry = new ZKAsyncRegistry(otherConf)) { - ReadOnlyZKClient zk2 = otherRegistry.getZKClient(); - - assertNotSame("Using a different configuration / quorum should result in different backing " + - "zk connection.", - zk1, zk2); - assertNotEquals( - "Using a different configrution / quorum should be reflected in the " + "zk connection.", - zk1.getConnectString(), zk2.getConnectString()); + assertNotSame("Using a different configuration / quorum should result in different backing " + + "zk connection.", + zk1, zk2); + assertNotEquals( + "Using a different configrution / quorum should be reflected in the " + "zk connection.", + zk1.getConnectString(), zk2.getConnectString()); + } + } finally { + LOG.info("DONE!"); } } } -- 2.11.0 (Apple Git-81)