Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1307695) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -32,9 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.NoSuchElementException; import java.util.Set; -import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; @@ -540,6 +538,7 @@ private int refCount; // indicates whether this connection's life cycle is managed + // this also changes the behavior when the ZK connection is lost private final boolean managed; /** * constructor @@ -574,35 +573,45 @@ HConstants.HBASE_CLIENT_PREFETCH_LIMIT, HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT); - setupZookeeperTrackers(); + ensureZookeeperTrackers(); this.master = null; this.masterChecked = false; } - private synchronized void setupZookeeperTrackers() + private synchronized void ensureZookeeperTrackers() throws ZooKeeperConnectionException{ // initialize zookeeper and master address manager - this.zooKeeper = getZooKeeperWatcher(); - masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); - masterAddressTracker.start(); - - this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); - this.rootRegionTracker.start(); - - this.clusterId = new ClusterId(this.zooKeeper, this); + try { + if (this.zooKeeper == null) { + this.zooKeeper = getZooKeeperWatcher(); + this.clusterId = new ClusterId(this.zooKeeper, this); + } + if (this.masterAddressTracker == null) { + masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); + masterAddressTracker.start(); + } + if (this.rootRegionTracker == null) { + this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this); + this.rootRegionTracker.start(); + } + } catch (ZooKeeperConnectionException e) { + resetZooKeeperTrackers(); + throw e; + } } - private synchronized void resetZooKeeperTrackers() - throws ZooKeeperConnectionException { - LOG.info("Trying to reconnect to zookeeper"); - masterAddressTracker.stop(); - masterAddressTracker = null; - rootRegionTracker.stop(); - rootRegionTracker = null; + private synchronized void resetZooKeeperTrackers() { + if (masterAddressTracker != null) { + masterAddressTracker.stop(); + masterAddressTracker = null; + } + if (rootRegionTracker != null) { + rootRegionTracker.stop(); + rootRegionTracker = null; + } clusterId = null; this.zooKeeper = null; - setupZookeeperTrackers(); } public Configuration getConfiguration() { @@ -623,6 +632,8 @@ LOG.info("Exception contacting master. Retrying...", ute.getCause()); } + if (!managed) ensureZookeeperTrackers(); + checkIfBaseNodeAvailable(); ServerName sn = null; synchronized (this.masterLock) { @@ -807,11 +818,12 @@ throw new IllegalArgumentException( "table name cannot be null or zero length"); } - + if (!managed) ensureZookeeperTrackers(); if (Bytes.equals(tableName, HConstants.ROOT_TABLE_NAME)) { try { - ServerName servername = - this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout); + ServerName servername = managed ? + this.rootRegionTracker.waitRootRegionLocation(this.rpcTimeout) : + this.rootRegionTracker.getRootRegionLocation(); LOG.debug("Looked up root region location, connection=" + this + "; serverName=" + ((servername == null)? "": servername.toString())); if (servername == null) return null; @@ -1255,6 +1267,7 @@ } else { rsName = Addressing.createHostAndPortStr(hostname, port); } + if (!managed) ensureZookeeperTrackers(); // See if we already have a connection (common case) server = this.servers.get(rsName); if (server == null) { @@ -1642,15 +1655,24 @@ @Override public void abort(final String msg, Throwable t) { - if (t instanceof KeeperException.SessionExpiredException) { + if (t instanceof KeeperException) { try { LOG.info("This client just lost it's session with ZooKeeper, trying" + " to reconnect."); - resetZooKeeperTrackers(); - LOG.info("Reconnected successfully. This disconnect could have been" + - " caused by a network partition or a long-running GC pause," + - " either way it's recommended that you verify your environment."); - return; + if (t instanceof KeeperException.SessionExpiredException) { + resetZooKeeperTrackers(); + if (managed) { + // if the connection is managed attempt to reconnect immediately + ensureZookeeperTrackers(); + LOG.info("Reconnected successfully. This disconnect could have been" + + " caused by a network partition or a long-running GC pause," + + " either way it's recommended that you verify your environment."); + return; + } + } + // an unmanaged can continue, + // while a managed connection needs to continue with the abort + if (!managed) return; } catch (ZooKeeperConnectionException e) { LOG.error("Could not reconnect to ZooKeeper after session" + " expiration, aborting");