Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java (revision 1311874) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java (working copy) @@ -267,15 +267,46 @@ */ public static int checkExists(ZooKeeperWatcher zkw, String znode) throws KeeperException { + return checkExists(zkw, znode, 0); + } + + /** + * Check if the specified node exists. Sets no watches. + * + * @param zkw zk reference + * @param znode path of node to watch + * @param timeout maximum time in ms after znode is considered not available. + * @return version of the node if it exists, -1 if does not exist + * @throws KeeperException if unexpected zookeeper exception + */ + public static int checkExists(ZooKeeperWatcher zkw, String znode, int timeout) + throws KeeperException { + long finished = System.currentTimeMillis() + timeout; + KeeperException keeperEx = null; try { - Stat s = zkw.getRecoverableZooKeeper().exists(znode, null); - return s != null ? s.getVersion() : -1; - } catch (KeeperException e) { - LOG.warn(zkw.prefix("Unable to set watcher on znode (" + znode + ")"), e); - zkw.keeperException(e); - return -1; + while (true) { + try { + Stat s = zkw.getRecoverableZooKeeper().exists(znode, null); + if (s != null) return s.getVersion(); + keeperEx = null; + } catch (KeeperException e) { + keeperEx = e; + } + + if (System.currentTimeMillis() > finished) { + // time is over, if we've an exception something went wrong, + // otherwise the znode doesn't exists. + if (keeperEx != null) { + LOG.warn(zkw.prefix("Unable to get znode (" + znode + ")"), keeperEx); + zkw.keeperException(keeperEx); + } + return -1; + } + + Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS); + } } catch (InterruptedException e) { - LOG.warn(zkw.prefix("Unable to set watcher on znode (" + znode + ")"), e); + LOG.warn(zkw.prefix("Unable to get znode (" + znode + ")"), e); zkw.interruptedException(e); return -1; } @@ -1257,4 +1288,5 @@ int port = Addressing.parsePort(str); return new ServerName(hostname, port, -1L); } -} \ No newline at end of file +} + Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (revision 1311874) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (working copy) @@ -234,8 +234,18 @@ * false if doesnot exists. */ public boolean checkIfBaseNodeAvailable() { + return checkIfBaseNodeAvailable(0); + } + + /** + * Checks if the baseznode set as per the property 'zookeeper.znode.parent' + * exists. + * @param timeout maximum time in ms after znode is considered not available. + * @return true if baseznode exists. + */ + public boolean checkIfBaseNodeAvailable(int timeout) { try { - if (ZKUtil.checkExists(watcher, watcher.baseZNode) == -1) { + if (ZKUtil.checkExists(watcher, watcher.baseZNode, timeout) == -1) { return false; } } catch (KeeperException e) { Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1311874) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -607,7 +607,8 @@ */ private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker) throws IOException, InterruptedException { - if (false == tracker.checkIfBaseNodeAvailable()) { + int timeout = conf.getInt("hbase.basenode.avail.timeout", 1000); + if (false == tracker.checkIfBaseNodeAvailable(timeout)) { String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " + "There could be a mismatch with the one configured in the master."; LOG.error(errorMsg);