Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1307909) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -32,9 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.NoSuchElementException; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -574,35 +572,48 @@ HConstants.HBASE_CLIENT_PREFETCH_LIMIT, HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT); - setupZookeeperTrackers(); - this.master = null; this.masterChecked = false; } - private synchronized void setupZookeeperTrackers() - throws ZooKeeperConnectionException{ + private synchronized void ensureZookeeperTrackers() + throws ZooKeeperConnectionException { // initialize zookeeper and master address manager - this.zooKeeper = getZooKeeperWatcher(); - masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); - masterAddressTracker.start(); - - this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); - this.rootRegionTracker.start(); - - this.clusterId = new ClusterId(this.zooKeeper, this); + try { + boolean reconnect = false; + if (this.zooKeeper == null) { + this.zooKeeper = getZooKeeperWatcher(); + this.clusterId = new ClusterId(this.zooKeeper, this); + reconnect = true; + } + if (this.masterAddressTracker == null) { + masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); + masterAddressTracker.start(); + reconnect = true; + } + if (this.rootRegionTracker == null) { + this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); + this.rootRegionTracker.start(); + reconnect = true; + } + if (reconnect) LOG.debug("(Re)connected to ZK successfully."); + } catch (ZooKeeperConnectionException e) { + resetZooKeeperTrackers(); + throw e; + } } - private synchronized void resetZooKeeperTrackers() - throws ZooKeeperConnectionException { - LOG.info("Trying to reconnect to zookeeper"); - masterAddressTracker.stop(); - masterAddressTracker = null; - rootRegionTracker.stop(); - rootRegionTracker = null; + private synchronized void resetZooKeeperTrackers() { + if (masterAddressTracker != null) { + masterAddressTracker.stop(); + masterAddressTracker = null; + } + if (rootRegionTracker != null) { + rootRegionTracker.stop(); + rootRegionTracker = null; + } clusterId = null; this.zooKeeper = null; - setupZookeeperTrackers(); } public Configuration getConfiguration() { @@ -623,6 +634,7 @@ LOG.info("Exception contacting master. Retrying...", ute.getCause()); } + ensureZookeeperTrackers(); checkIfBaseNodeAvailable(); ServerName sn = null; synchronized (this.masterLock) { @@ -807,11 +819,10 @@ throw new IllegalArgumentException( "table name cannot be null or zero length"); } - + ensureZookeeperTrackers(); if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) { try { - ServerName servername = - this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout); + ServerName servername = this.rootRegionTracker.getRootRegionLocation(); LOG.debug("Looked up root region location, connection=" + this + "; serverName=" + ((servername == null)? "": servername.toString())); if (servername == null) return null; @@ -1255,6 +1266,7 @@ } else { rsName = Addressing.createHostAndPortStr(hostname, port); } + ensureZookeeperTrackers(); // See if we already have a connection (common case) server = this.servers.get(rsName); if (server == null) { @@ -1642,25 +1654,22 @@ @Override public void abort(final String msg, Throwable t) { - if (t instanceof KeeperException.SessionExpiredException) { - try { - LOG.info("This client just lost it's session with ZooKeeper, trying" + - " to reconnect."); - resetZooKeeperTrackers(); - LOG.info("Reconnected successfully. This disconnect could have been" + + LOG.debug("Abort", t); + if (t instanceof KeeperException) { + LOG.info("This client just lost it's session with ZooKeeper, will" + + " automatically reconnect when needed."); + if (t instanceof KeeperException.SessionExpiredException) { + LOG.info("ZK session expired. This disconnect could have been" + " caused by a network partition or a long-running GC pause," + " either way it's recommended that you verify your environment."); - return; - } catch (ZooKeeperConnectionException e) { - LOG.error("Could not reconnect to ZooKeeper after session" + - " expiration, aborting"); - t = e; + resetZooKeeperTrackers(); } + return; } if (t != null) LOG.fatal(msg, t); else LOG.fatal(msg); this.aborted = true; - this.closed = true; + close(); } @Override