Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1307629) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -32,9 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.NoSuchElementException; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -540,6 +538,7 @@ private int refCount; // indicates whether this connection's life cycle is managed + // this also changes the behavior when the ZK connection is lost private final boolean managed; /** * constructor @@ -574,35 +573,45 @@ HConstants.HBASE_CLIENT_PREFETCH_LIMIT, HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT); - setupZookeeperTrackers(); + ensureZookeeperTrackers(); this.master = null; this.masterChecked = false; } - private synchronized void setupZookeeperTrackers() + private synchronized void ensureZookeeperTrackers() throws ZooKeeperConnectionException{ // initialize zookeeper and master address manager - this.zooKeeper = getZooKeeperWatcher(); - masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); - masterAddressTracker.start(); - - this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); - this.rootRegionTracker.start(); - - this.clusterId = new ClusterId(this.zooKeeper, this); + try { + if (this.zooKeeper == null) { + this.zooKeeper = getZooKeeperWatcher(); + this.clusterId = new ClusterId(this.zooKeeper, this); + } + if (this.masterAddressTracker == null) { + masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); + masterAddressTracker.start(); + } + if (this.rootRegionTracker == null) { + this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); + this.rootRegionTracker.start(); + } + } catch (ZooKeeperConnectionException e) { + resetZooKeeperTrackers(); + throw e; + } } - private synchronized void resetZooKeeperTrackers() - throws ZooKeeperConnectionException { - LOG.info("Trying to reconnect to zookeeper"); - masterAddressTracker.stop(); - masterAddressTracker = null; - rootRegionTracker.stop(); - rootRegionTracker = null; + private synchronized void resetZooKeeperTrackers() { + if (masterAddressTracker != null) { + masterAddressTracker.stop(); + masterAddressTracker = null; + } + if (rootRegionTracker != null) { + rootRegionTracker.stop(); + rootRegionTracker = null; + } clusterId = null; this.zooKeeper = null; - setupZookeeperTrackers(); } public Configuration getConfiguration() { @@ -623,6 +632,8 @@ LOG.info("Exception contacting master. Retrying...", ute.getCause()); } + if (!managed) ensureZookeeperTrackers(); + checkIfBaseNodeAvailable(); ServerName sn = null; synchronized (this.masterLock) { @@ -807,7 +818,7 @@ throw new IllegalArgumentException( "table name cannot be null or zero length"); } - + if (!managed) ensureZookeeperTrackers(); if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) { try { ServerName servername = @@ -1255,6 +1266,7 @@ } else { rsName = Addressing.createHostAndPortStr(hostname, port); } + if (!managed) ensureZookeeperTrackers(); // See if we already have a connection (common case) server = this.servers.get(rsName); if (server == null) { @@ -1642,15 +1654,24 @@ @Override public void abort(final String msg, Throwable t) { - if (t instanceof KeeperException.SessionExpiredException) { + if (t instanceof KeeperException) { try { LOG.info("This client just lost it's session with ZooKeeper, trying" + " to reconnect."); - resetZooKeeperTrackers(); - LOG.info("Reconnected successfully. This disconnect could have been" + - " caused by a network partition or a long-running GC pause," + - " either way it's recommended that you verify your environment."); - return; + if (t instanceof KeeperException.SessionExpiredException) { + resetZooKeeperTrackers(); + if (managed) { + // if the connection is managed attempt to reconnect immediately + ensureZookeeperTrackers(); + LOG.info("Reconnected successfully. This disconnect could have been" + + " caused by a network partition or a long-running GC pause," + + " either way it's recommended that you verify your environment."); + return; + } + } + // an unmanaged can continue, + // while a managed connection needs to continue with the abort + if (!managed) return; } catch (ZooKeeperConnectionException e) { LOG.error("Could not reconnect to ZooKeeper after session" + " expiration, aborting");