diff --git hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index d9410d2..540fb4b 100644 --- hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -753,7 +753,7 @@ private boolean reEstablishSession() { return success; } - private void createConnection() throws IOException, KeeperException { + protected void createConnection() throws IOException, KeeperException { if (zkClient != null) { try { zkClient.close(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 5176477..a537a7a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -527,6 +527,9 @@ private static void addDeprecatedKeys() { public static final int DEFAULT_CLIENT_FAILOVER_RETRIES_ON_SOCKET_TIMEOUTS = 0; + /** number of zookeeper operation retry times in ActiveStandbyElector */ + public static final String RM_HA_FC_ELECTOR_ZK_OP_RETRIES_KEY = RM_HA_PREFIX + + "failover-controller.active-standby-elector.zk.op.retries"; //////////////////////////////// // RM state store configs //////////////////////////////// diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java index 73bdca0..f65da36 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java @@ -86,11 +86,40 @@ protected void serviceInit(Configuration conf) List zkAcls = RMZKUtils.getZKAcls(conf); List zkAuths = RMZKUtils.getZKAuths(conf); - int maxRetryNum = conf.getInt( - CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_KEY, - CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT); + String tempMaxRetryNum = + conf.get(YarnConfiguration.RM_HA_FC_ELECTOR_ZK_OP_RETRIES_KEY); + final int maxRetryNum = + tempMaxRetryNum == null ? conf.getInt( + CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_KEY, + CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT) + : Integer.parseInt(tempMaxRetryNum); + elector = new ActiveStandbyElector(zkQuorum, (int) zkSessionTimeout, - electionZNode, zkAcls, zkAuths, this, maxRetryNum); + electionZNode, zkAcls, zkAuths, this, maxRetryNum) { + @Override + protected void createConnection() throws IOException, KeeperException { + int connectionRetryCount = 0; + boolean success = false; + while(!success && connectionRetryCount < maxRetryNum) { + LOG.debug("Establishing zookeeper connection for " + this); + try { + super.createConnection(); + success = true; + } catch(IOException e) { + LOG.warn(e); + sleepFor(5000); + } catch(KeeperException e) { + LOG.warn(e); + sleepFor(5000); + } + ++connectionRetryCount; + } + if (!success) { + throw new IOException( + "Can not establish Zookeeper Connection for " + this); + } + } + }; elector.ensureParentZNode(); if (!isParentZnodeSafe(clusterId)) {