Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (working copy) @@ -170,8 +170,8 @@ public void start() throws IOException, InterruptedException { try { - this.rootRegionTracker.start(true); - this.metaNodeTracker.start(true); + this.rootRegionTracker.start(); + this.metaNodeTracker.start(); LOG.debug("Starting catalog tracker " + this); }catch (RuntimeException e){ Throwable t = e.getCause(); Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -444,6 +444,8 @@ private volatile boolean closed; private volatile HMasterInterface master; private volatile boolean masterChecked; + private volatile boolean isResettingZKTrackers; + // ZooKeeper reference private ZooKeeperWatcher zooKeeper; // ZooKeeper-based master address tracker @@ -511,25 +513,28 @@ HConstants.HBASE_CLIENT_PREFETCH_LIMIT, HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT); - setupZookeeperTrackers(true); + setupZookeeperTrackers(); this.master = null; this.masterChecked = false; + this.isResettingZKTrackers = false; } - private boolean setupZookeeperTrackers(boolean allowAbort) + private boolean setupZookeeperTrackers() throws ZooKeeperConnectionException{ // initialize zookeeper and master address manager this.zooKeeper = getZooKeeperWatcher(); this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); - if (!this.masterAddressTracker.start(allowAbort)) { + if (!this.masterAddressTracker.start()) { + this.zooKeeper.close(); this.masterAddressTracker.stop(); this.masterAddressTracker = null; this.zooKeeper = null; return false; } - if (!this.rootRegionTracker.start(allowAbort)) { + if (!this.rootRegionTracker.start()) { + this.zooKeeper.close(); this.masterAddressTracker.stop(); this.rootRegionTracker.stop(); this.masterAddressTracker = null; @@ -544,33 +549,40 @@ public synchronized void resetZooKeeperTrackersWithRetries() throws ZooKeeperConnectionException { LOG.info("Trying to reconnect to zookeeper."); - if (this.masterAddressTracker != null) { - this.masterAddressTracker.stop(); - this.masterAddressTracker = null; - } - if (this.rootRegionTracker != null) { - this.rootRegionTracker.stop(); - this.rootRegionTracker = null; - } - this.zooKeeper = null; - for (int tries = 0; tries < this.numRetries; tries++) { - boolean isLastTime = (tries == (this.numRetries - 1)); - try { - if (setupZookeeperTrackers(isLastTime)) { - break; + this.isResettingZKTrackers = true; + try { + if (this.masterAddressTracker != null) { + this.masterAddressTracker.stop(); + this.masterAddressTracker = null; + } + if (this.rootRegionTracker != null) { + this.rootRegionTracker.stop(); + this.rootRegionTracker = null; + } + if (this.zooKeeper != null) { + this.zooKeeper.close(); + this.zooKeeper = null; + } + for (int tries = 0; tries < this.numRetries; tries++) { + try { + if (setupZookeeperTrackers()) { + break; + } + } catch (ZooKeeperConnectionException zkce) { + if (tries >= this.numRetries) { + throw zkce; + } } - } catch (ZooKeeperConnectionException zkce) { - if (isLastTime) { - throw zkce; + LOG.info("Tried to reconnect to zookeeper but failed, already tried " + + tries + " times."); + try { + Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries)); + } catch (InterruptedException e1) { + Thread.currentThread().interrupt(); } } - LOG.info("Tried to reconnect to zookeeper but failed, already tried " - + tries + " times."); - try { - Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries)); - } catch (InterruptedException e1) { - Thread.currentThread().interrupt(); - } + } finally { + this.isResettingZKTrackers = false; } } @@ -1569,6 +1581,9 @@ public void abort(final String msg, Throwable t) { if ((t instanceof KeeperException.SessionExpiredException) || (t instanceof KeeperException.ConnectionLossException)) { + if (this.isResettingZKTrackers) { + return; + } try { LOG.info("This client just lost it's session with ZooKeeper, trying" + " to reconnect."); Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -362,7 +362,7 @@ // Set the cluster as up. If new RSs, they'll be waiting on this before // going ahead with their startup. this.clusterStatusTracker = new ClusterStatusTracker(getZooKeeper(), this); - this.clusterStatusTracker.start(true); + this.clusterStatusTracker.start(); boolean wasUp = this.clusterStatusTracker.isClusterUp(); if (!wasUp) this.clusterStatusTracker.setClusterUp(); Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -489,13 +489,13 @@ // block until a master is available. No point in starting up if no master // running. this.masterAddressManager = new MasterAddressTracker(this.zooKeeper, this); - this.masterAddressManager.start(true); + this.masterAddressManager.start(); blockAndCheckIfStopped(this.masterAddressManager); // Wait on cluster being up. Master will set this flag up in zookeeper // when ready. this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this); - this.clusterStatusTracker.start(true); + this.clusterStatusTracker.start(); blockAndCheckIfStopped(this.clusterStatusTracker); // Create the catalog tracker and start it; Index: src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/replication/ReplicationZookeeper.java (working copy) @@ -171,7 +171,7 @@ // Set a tracker on replicationStateNodeNode this.statusTracker = new ReplicationStatusTracker(this.zookeeper, abortable); - statusTracker.start(true); + statusTracker.start(); readReplicationStateZnode(); } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (revision 1236536) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (working copy) @@ -71,7 +71,7 @@ * KeeperException occur. * @return start result. true if start successfully. */ - public synchronized boolean start(boolean allowAbort) { + public synchronized boolean start() { this.watcher.registerListener(this); try { if(ZKUtil.watchAndCheckExists(watcher, node)) { @@ -80,12 +80,12 @@ this.data = data; } else { // It existed but now does not, try again to ensure a watch is set - return start(allowAbort); + return start(); } } return true; } catch (KeeperException e) { - if (allowAbort && (abortable != null)) { + if (abortable != null) { abortable.abort("Unexpected exception during initialization, aborting", e); } Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java (revision 1236536) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestMasterAddressManager.java (working copy) @@ -64,7 +64,7 @@ // Should not have a master yet MasterAddressTracker addressManager = new MasterAddressTracker(zk, null); - addressManager.start(true); + addressManager.start(); assertFalse(addressManager.hasMaster()); zk.registerListener(addressManager); Index: src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java (revision 1236536) +++ src/test/java/org/apache/hadoop/hbase/zookeeper/TestZooKeeperNodeTracker.java (working copy) @@ -72,7 +72,7 @@ ZooKeeperWatcher zk = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), "testInterruptible", abortable); final TestTracker tracker = new TestTracker(zk, "/xyz", abortable); - tracker.start(true); + tracker.start(); Thread t = new Thread() { @Override public void run() { @@ -105,7 +105,7 @@ // Start a ZKNT with no node currently available TestTracker localTracker = new TestTracker(zk, node, abortable); - localTracker.start(true); + localTracker.start(); zk.registerListener(localTracker); // Make sure we don't have a node @@ -120,7 +120,7 @@ // Now, start a new ZKNT with the node already available TestTracker secondTracker = new TestTracker(zk, node, null); - secondTracker.start(true); + secondTracker.start(); zk.registerListener(secondTracker); // Put up an additional zk listener so we know when zk event is done @@ -213,7 +213,7 @@ public WaitToGetDataThread(ZooKeeperWatcher zk, String node) { tracker = new TestTracker(zk, node, null); - tracker.start(true); + tracker.start(); zk.registerListener(tracker); hasData = false; }