diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 15ac971..e4ab528 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -104,6 +104,8 @@ private String zkHostPort = null; private int zkSessionTimeout; + // wait time for zkClient to re-establish connection with zk-server. + private long zkResyncWaitTime; @VisibleForTesting long zkRetryInterval; @@ -234,6 +236,7 @@ public synchronized void initInternal(Configuration conf) throws Exception { conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS, YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS); } + zkResyncWaitTime = zkRetryInterval * numRetries; zkAcl = RMZKUtils.getZKAcls(conf); zkAuths = RMZKUtils.getZKAuths(conf); @@ -1090,11 +1093,11 @@ T runWithCheck() throws Exception { long startTime = System.currentTimeMillis(); synchronized (ZKRMStateStore.this) { while (zkClient == null) { - ZKRMStateStore.this.wait(zkSessionTimeout); + ZKRMStateStore.this.wait(zkResyncWaitTime); if (zkClient != null) { break; } - if (System.currentTimeMillis() - startTime > zkSessionTimeout) { + if (System.currentTimeMillis() - startTime > zkResyncWaitTime) { throw new IOException("Wait for ZKClient creation timed out"); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java index 62dc5ef..0b9eff6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java @@ -170,10 +170,10 @@ public void testZKClientDisconnectAndReconnect() throws Exception { TestZKClient zkClientTester = new TestZKClient(); - String path = "/test"; + final String path = "/test"; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS); - ZKRMStateStore store = + final ZKRMStateStore store = (ZKRMStateStore) zkClientTester.getRMStateStore(conf); TestDispatcher dispatcher = new TestDispatcher(); store.setRMDispatcher(dispatcher); @@ -185,14 +185,20 @@ public void testZKClientDisconnectAndReconnect() store.setDataWithRetries(path, "newBytes".getBytes(), 0); stopServer(); + final AtomicBoolean isSucceeded = new AtomicBoolean(false); zkClientTester.watcher.waitForDisconnected(ZK_OP_WAIT_TIME); - try { - store.getDataWithRetries(path, true); - fail("Expected ZKClient time out exception"); - } catch (Exception e) { - assertTrue(e.getMessage().contains( - "Wait for ZKClient creation timed out")); - } + Thread thread = new Thread() { + @Override + public void run() { + try { + store.getDataWithRetries(path, true); + isSucceeded.set(true); + } catch (Exception e) { + fail("zk op failed"); + } + } + }; + thread.start(); // ZKRMStateStore Session restored startServer(); @@ -206,6 +212,8 @@ public void testZKClientDisconnectAndReconnect() fail(error); } assertEquals("newBytes", new String(ret)); + thread.join(); + assertTrue(isSucceeded.get()); } @Test(timeout = 20000)