From 4557068fad251c99eb41a400318453e8cc8f628f Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 10 Jan 2018 14:25:39 -0800 Subject: [PATCH] HBASE-19753 Miscellany of fixes for hbase-zookeeper tests to make them more robust First, we add test resources to CLASSPATH when tests run. W/o it, there was no logging of hbase-zookeeper test output (not sure why I have to add this here and not over in hbase-server; research turns up nothing so far). M hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKMainServer.java Improve fail log message. M hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestReadOnlyZKClient.java M hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKNodeTracker.java Wait until ZK is connected before progressing. On my slow zk, it could be a while post construction before zk connected. Using an unconnected zk caused test to fail. Later in this test we kill a session... so makes sense that when we try to use the old zk, we could get a ConnectionLoss. Catch it. Allow some time for transition to new zk to happen before calling asserts. M hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKMainServer.java Change session timeout to default 30s from 1s which was way too short. M hbase-zookeeper/src/test/resources/log4j.properties Set zk logs to DEBUG level in this module at least. --- hbase-zookeeper/pom.xml | 3 +++ .../hadoop/hbase/zookeeper/ZKMainServer.java | 9 ++++++--- .../hbase/zookeeper/TestReadOnlyZKClient.java | 23 ++++++++++++++++++---- .../hadoop/hbase/zookeeper/TestZKMainServer.java | 3 ++- .../hadoop/hbase/zookeeper/TestZKNodeTracker.java | 4 ++++ .../src/test/resources/log4j.properties | 2 +- 6 files changed, 35 insertions(+), 9 deletions(-) diff --git a/hbase-zookeeper/pom.xml b/hbase-zookeeper/pom.xml index 67d373013f..aff824bcb0 100644 --- a/hbase-zookeeper/pom.xml +++ b/hbase-zookeeper/pom.xml @@ -96,6 +96,9 @@ maven-surefire-plugin + + src/test/resources + listener diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKMainServer.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKMainServer.java index 3a96015ffa..80a2f7e973 100644 --- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKMainServer.java +++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKMainServer.java @@ -53,12 +53,15 @@ public class ZKMainServer { // Make sure we are connected before we proceed. Can take a while on some systems. If we // run the command without being connected, we get ConnectionLoss KeeperErrorConnection... Stopwatch stopWatch = Stopwatch.createStarted(); + // Make it 30seconds. We dont' have a config in this context and zk doesn't have + // a timeout until after connection. 30000ms is default for zk. + long timeout = 30000; while (!this.zk.getState().isConnected()) { Thread.sleep(1); - if (stopWatch.elapsed(TimeUnit.SECONDS) > 10) { + if (stopWatch.elapsed(TimeUnit.MILLISECONDS) > timeout) { throw new InterruptedException("Failed connect after waiting " + - stopWatch.elapsed(TimeUnit.SECONDS) + "seconds; state=" + this.zk.getState() + - "; " + this.zk); + stopWatch.elapsed(TimeUnit.MILLISECONDS) + "ms (zk session timeout); state=" + + this.zk.getState() + "; " + this.zk); } } } diff --git a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestReadOnlyZKClient.java b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestReadOnlyZKClient.java index 1f83536393..18bcbf0727 100644 --- a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestReadOnlyZKClient.java +++ b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestReadOnlyZKClient.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.ZKTests; +import org.apache.hadoop.hbase.util.Threads; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.Code; @@ -67,8 +68,11 @@ public class TestReadOnlyZKClient { public static void setUp() throws Exception { PORT = UTIL.startMiniZKCluster().getClientPort(); - ZooKeeper zk = new ZooKeeper("localhost:" + PORT, 10000, e -> { - }); + ZooKeeper zk = new ZooKeeper("localhost:" + PORT, 10000, e -> {}); + // Can take a while to get connected. Progressing w/o being connected will have us fail. + while (!zk.getState().isConnected()) { + Threads.sleep(10); + } DATA = new byte[10]; ThreadLocalRandom.current().nextBytes(DATA); zk.create(PATH, DATA, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); @@ -137,10 +141,21 @@ public class TestReadOnlyZKClient { UTIL.getZkCluster().getZooKeeperServers().get(0).closeSession(sessionId); // should not reach keep alive so still the same instance assertSame(zk, RO_ZK.getZooKeeper()); - - assertArrayEquals(DATA, RO_ZK.get(PATH).get()); + try { + byte [] got = RO_ZK.get(PATH).get(); + assertArrayEquals(DATA, got); + } catch (ExecutionException ee) { + // We killed session above. Can get a ZK ConnectionLoss here. + System.out.println(ee); + } + while (RO_ZK.getZooKeeper() == null || RO_ZK.getZooKeeper() == zk) { + Threads.sleep(10); + } assertNotNull(RO_ZK.getZooKeeper()); assertNotSame(zk, RO_ZK.getZooKeeper()); + while (!RO_ZK.getZooKeeper().getState().isConnected()) { + Threads.sleep(10); + } assertNotEquals(sessionId, RO_ZK.getZooKeeper().getSessionId()); } } diff --git a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKMainServer.java b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKMainServer.java index bc1c240e59..d8db8aedac 100644 --- a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKMainServer.java +++ b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKMainServer.java @@ -70,7 +70,8 @@ public class TestZKMainServer { public void testCommandLineWorks() throws Exception { System.setSecurityManager(new NoExitSecurityManager()); HBaseZKTestingUtility htu = new HBaseZKTestingUtility(); - htu.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 1000); + // Make it long so for sure succeeds. + htu.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 30000); htu.startMiniZKCluster(); try { ZKWatcher zkw = htu.getZooKeeperWatcher(); diff --git a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKNodeTracker.java b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKNodeTracker.java index 3778ca0119..f1c0cb907f 100644 --- a/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKNodeTracker.java +++ b/hbase-zookeeper/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKNodeTracker.java @@ -132,6 +132,10 @@ public class TestZKNodeTracker { final ZooKeeper zkconn = new ZooKeeper(ZKConfig.getZKQuorumServersString(TEST_UTIL.getConfiguration()), 60000, e -> { }); + // Can take a while to get connected. Progressing w/o being connected will have us fail. + while (!zkconn.getState().isConnected()) { + Threads.sleep(10); + } // Add the node with data one zkconn.create(node, dataOne, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); diff --git a/hbase-zookeeper/src/test/resources/log4j.properties b/hbase-zookeeper/src/test/resources/log4j.properties index c322699ced..f599ea636e 100644 --- a/hbase-zookeeper/src/test/resources/log4j.properties +++ b/hbase-zookeeper/src/test/resources/log4j.properties @@ -55,7 +55,7 @@ log4j.appender.console.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %C{2}(%L): #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG log4j.logger.org.apache.hadoop=WARN -log4j.logger.org.apache.zookeeper=ERROR +log4j.logger.org.apache.zookeeper=DEBUG log4j.logger.org.apache.hadoop.hbase=DEBUG #These settings are workarounds against spurious logs from the minicluster. -- 2.11.0 (Apple Git-81)