Index: src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 1151399) +++ src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -570,7 +570,7 @@ return master; } } - + checkIfBaseNodeAvailable(); ServerName sn = null; synchronized (this.masterLock) { for (int tries = 0; @@ -632,6 +632,15 @@ return this.master; } + private void checkIfBaseNodeAvailable() throws MasterNotRunningException { + if (false == masterAddressTracker.checkIfBaseNodeAvailable()) { + String errorMsg = "ZooKeeper available but base node mismatch. Check the value configured in the 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + throw new MasterNotRunningException(errorMsg); + } + } + public boolean isMasterRunning() throws MasterNotRunningException, ZooKeeperConnectionException { if (this.master == null) { Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1151399) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -238,7 +238,7 @@ if (this.conf.get("mapred.task.id") == null) { this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString()); } - this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this); + this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true); this.metrics = new MasterMetrics(getServerName().toString()); } @@ -1218,7 +1218,7 @@ private boolean tryRecoveringExpiredZKSession() throws InterruptedException, IOException, KeeperException { this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" - + this.serverName.getPort(), this); + + this.serverName.getPort(), this, true); MonitoredTask status = TaskMonitor.get().createStatus("Recovering expired ZK session"); Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1151401) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -556,6 +556,13 @@ */ private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker) throws IOException, InterruptedException { + if (false == tracker.checkIfBaseNodeAvailable()) { + String errorMsg = "Base node " + + " not found. Check the value configured in 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + abort(errorMsg); + } while (tracker.blockUntilAvailable(this.msgInterval) == null) { if (this.stopped) { throw new IOException("Received the shutdown message while waiting."); Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java (revision 1151399) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java (working copy) @@ -71,6 +71,15 @@ */ public ServerName waitRootRegionLocation(long timeout) throws InterruptedException { + if (false == checkIfBaseNodeAvailable()) { + String errorMsg = "Base node " + + watcher.baseZNode + + " not found. Check the value configured in 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + throw new IllegalArgumentException( + errorMsg); + } return dataToServerName(super.blockUntilAvailable(timeout)); } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (revision 1151399) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (working copy) @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase.zookeeper; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; import org.apache.zookeeper.KeeperException; @@ -32,6 +34,8 @@ * RegionServers. */ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener { + + static final Log LOG = LogFactory.getLog(ZooKeeperNodeTracker.class); /** Path of node being tracked */ protected final String node; @@ -179,4 +183,24 @@ nodeCreated(path); } } + + /** + * Checks if the baseznode set as per the property 'zookeeper.znode.parent' + * exists. + * @return true if baseznode exists. + * false if doesnot exists. + */ + public boolean checkIfBaseNodeAvailable() { + try { + if (ZKUtil.checkExists(watcher, watcher.baseZNode) == -1) { + return false; + } + } catch (KeeperException e) { + abortable + .abort( + "Unexpected exception handling while checking if basenode exists.", + e); + } + return true; + } } Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1151399) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -100,10 +100,23 @@ * @throws ZooKeeperConnectionException */ public ZooKeeperWatcher(Configuration conf, String descriptor, - Abortable abortable) - throws IOException, ZooKeeperConnectionException { + Abortable abortable) throws IOException, ZooKeeperConnectionException { + this(conf, descriptor, abortable, false); + } + /** + * Instantiate a ZooKeeper connection and watcher. + * + * @param descriptor + * Descriptive string that is added to zookeeper sessionid and used + * as identifier for this instance. + * @throws IOException + * @throws ZooKeeperConnectionException + */ + public ZooKeeperWatcher(Configuration conf, String descriptor, + Abortable abortable, boolean canCreateBaseZNode) throws IOException, + ZooKeeperConnectionException { this.conf = conf; - // Capture a stack trace now. Will print it out later if problem so we can + // Capture a stack trace now. Will print it out later if problem so we can // distingush amongst the myriad ZKWs. try { throw new Exception("ZKW CONSTRUCTOR STACK TRACE FOR DEBUGGING"); @@ -117,12 +130,19 @@ this.abortable = abortable; setNodeNames(conf); this.zooKeeper = ZKUtil.connect(conf, quorum, this, descriptor); + if (canCreateBaseZNode) { + createBaseZNodes(conf); + } + } + + private void createBaseZNodes(Configuration conf) + throws ZooKeeperConnectionException { try { // Create all the necessary "directories" of znodes // TODO: Move this to an init method somewhere so not everyone calls it? - // The first call against zk can fail with connection loss. Seems common. - // Apparently this is recoverable. Retry a while. + // The first call against zk can fail with connection loss. Seems common. + // Apparently this is recoverable. Retry a while. // See http://wiki.apache.org/hadoop/ZooKeeper/ErrorHandling // TODO: Generalize out in ZKUtil. long wait = conf.getLong(HConstants.ZOOKEEPER_RECOVERABLE_WAITTIME, @@ -136,10 +156,10 @@ break; } catch (KeeperException.ConnectionLossException e) { if (LOG.isDebugEnabled() && (isFinishedRetryingRecoverable(finished))) { - LOG.debug("Retrying zk create for another " + - (finished - System.currentTimeMillis()) + - "ms; set 'hbase.zookeeper.recoverable.waittime' to change " + - "wait time); " + e.getMessage()); + LOG.debug("Retrying zk create for another " + + (finished - System.currentTimeMillis()) + + "ms; set 'hbase.zookeeper.recoverable.waittime' to change " + + "wait time); " + e.getMessage()); } ke = e; } @@ -153,12 +173,13 @@ Thread.currentThread().interrupt(); LOG.warn("Interrupted while closing", e); } - throw new ZooKeeperConnectionException("HBase is able to connect to" + - " ZooKeeper but the connection closes immediately. This could be" + - " a sign that the server has too many connections (30 is the" + - " default). Consider inspecting your ZK server logs for that" + - " error and then make sure you are reusing HBaseConfiguration" + - " as often as you can. See HTable's javadoc for more information.", + throw new ZooKeeperConnectionException( + "HBase is able to connect to" + + " ZooKeeper but the connection closes immediately. This could be" + + " a sign that the server has too many connections (30 is the" + + " default). Consider inspecting your ZK server logs for that" + + " error and then make sure you are reusing HBaseConfiguration" + + " as often as you can. See HTable's javadoc for more information.", ke); } ZKUtil.createAndFailSilent(this, assignmentZNode); @@ -170,7 +191,7 @@ prefix("Unexpected KeeperException creating base node"), e); } } - + private boolean isFinishedRetryingRecoverable(final long finished) { return System.currentTimeMillis() < finished; } Index: src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (revision 1151399) +++ src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (working copy) @@ -84,7 +84,7 @@ } }; this.watcher = new ZooKeeperWatcher(UTIL.getConfiguration(), - this.getClass().getSimpleName(), this.abortable); + this.getClass().getSimpleName(), this.abortable, true); } @After public void after() { Index: src/test/java/org/apache/hadoop/hbase/client/TestZookeeperBaseNode.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/client/TestZookeeperBaseNode.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/client/TestZookeeperBaseNode.java (revision 0) @@ -0,0 +1,92 @@ +/** + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Test; + +public class TestZookeeperBaseNode { + + private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final static byte[] TABLENAME = Bytes.toBytes("TestHTablePool"); + + @After + public void afterClass() throws IOException { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testShouldThrowErrorIfTablesNotAbleToLocateTheBaseNode() + throws Exception { + Configuration config = TEST_UTIL.getConfiguration(); + config.set("zookeeper.znode.parent", "/baseZNode"); + TEST_UTIL.startMiniCluster(1); + TEST_UTIL.createTable(TABLENAME, HConstants.CATALOG_FAMILY); + /* TEST_UTIL = new HBaseTestingUtility(); */ + config = TEST_UTIL.getConfiguration(); + config.set("zookeeper.znode.parent", "/hbase"); + try { + new HTable(config, TABLENAME); + fail("Should fail if the basenode is changed."); + } catch (IllegalArgumentException e) { + } + } + + @Test + public void testShouldThrowErrorIfTheBaseZNodeIsNotFoundWhileGettingTheMaster() + throws Exception { + Configuration config = TEST_UTIL.getConfiguration(); + config.set("zookeeper.znode.parent", "/baseZNode"); + TEST_UTIL.startMiniCluster(1); + config = TEST_UTIL.getConfiguration(); + config.set("zookeeper.znode.parent", "/hbase"); + try { + new HBaseAdmin(config); + fail("Should fail if the basenode is changed."); + } catch (MasterNotRunningException e) { + } + } + + @Test + public void testShouldThrowExceptionIfRSIsStartedButTheBaseNodeIsMisMatchingWithMaster() + throws Exception { + TEST_UTIL.getConfiguration().set("zookeeper.znode.parent", "/baseZNode"); + TEST_UTIL.startMiniCluster(1); + MiniHBaseCluster miniHBaseCluster = TEST_UTIL.getMiniHBaseCluster(); + miniHBaseCluster.stopRegionServer(0); + miniHBaseCluster.waitOnRegionServer(0); + miniHBaseCluster.stopMaster(0); + miniHBaseCluster.waitOnMaster(0); + miniHBaseCluster.getConfiguration().set("zookeeper.znode.parent", "/hbase"); + miniHBaseCluster.startRegionServer(); + assertTrue("RS should be stopped.", miniHBaseCluster.getRegionServer(0).isStopped()); + } +} Index: src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java (revision 1151399) +++ src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java (working copy) @@ -67,7 +67,7 @@ @Test (timeout=300000) public void testRestartClusterAfterKill() throws Exception { UTIL.startMiniZKCluster(); - zooKeeper = new ZooKeeperWatcher(UTIL.getConfiguration(), "cluster1", null); + zooKeeper = new ZooKeeperWatcher(UTIL.getConfiguration(), "cluster1", null, true); // create the unassigned region, throw up a region opened state for META String unassignedZNode = zooKeeper.assignmentZNode; Index: src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (revision 1151399) +++ src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (working copy) @@ -76,7 +76,7 @@ final ZooKeeperWatcher zk; MockServer() throws ZooKeeperConnectionException, IOException { - this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME.toString(), this); + this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME.toString(), this, true); } @Override Index: src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java (revision 1151399) +++ src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java (working copy) @@ -104,7 +104,7 @@ utility1 = new HBaseTestingUtility(conf1); utility1.startMiniZKCluster(); MiniZooKeeperCluster miniZK = utility1.getZkCluster(); - zkw1 = new ZooKeeperWatcher(conf1, "cluster1", null); + zkw1 = new ZooKeeperWatcher(conf1, "cluster1", null, true); admin = new ReplicationAdmin(conf1); LOG.info("Setup first Zk"); @@ -116,7 +116,7 @@ utility2 = new HBaseTestingUtility(conf2); utility2.setZkCluster(miniZK); - zkw2 = new ZooKeeperWatcher(conf2, "cluster2", null); + zkw2 = new ZooKeeperWatcher(conf2, "cluster2", null, true); slaveClusterKey = conf2.get(HConstants.ZOOKEEPER_QUORUM)+":" + conf2.get("hbase.zookeeper.property.clientPort")+":/2"; Index: src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java (revision 1151399) +++ src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java (working copy) @@ -60,7 +60,7 @@ } }; ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), - name, abortable); + name, abortable, true); ZKTable zkt = new ZKTable(zkw); assertTrue(zkt.isEnabledTable(name)); assertFalse(zkt.isDisablingTable(name));