Index: src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (revision 1002359) +++ src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (working copy) @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.catalog; import java.io.IOException; +import java.net.ConnectException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; @@ -36,13 +37,13 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; -import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.util.Progressable; @@ -52,6 +53,7 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.Matchers; import org.mockito.Mockito; /** @@ -100,6 +102,61 @@ return ct; } + @Test public void testGetMetaServerConnectionFails() + throws IOException, InterruptedException, KeeperException { + HConnection connection = Mockito.mock(HConnection.class); + ConnectException connectException = + new ConnectException("Connection refused"); + final HRegionInterface implementation = + Mockito.mock(HRegionInterface.class); + Mockito.when(implementation.get((byte [])Mockito.any(), (Get)Mockito.any())). + thenThrow(connectException); + Mockito.when(connection.getHRegionConnection((HServerAddress)Matchers.anyObject(), Matchers.anyBoolean())). + thenReturn(implementation); + Assert.assertNotNull(connection.getHRegionConnection(new HServerAddress(), false)); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, + new HServerAddress("example.com:1234")); + Assert.assertFalse(ct.verifyMetaRegionLocation(100)); + } finally { + // Clean out root location or later tests will be confused... they presume + // start fresh in zk. + RootLocationEditor.deleteRootLocation(this.watcher); + } + } + + /** + * Test get of root region fails properly if nothing to connect to. + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test + public void testVerifyRootRegionLocationFails() + throws IOException, InterruptedException, KeeperException { + HConnection connection = Mockito.mock(HConnection.class); + ConnectException connectException = + new ConnectException("Connection refused"); + final HRegionInterface implementation = + Mockito.mock(HRegionInterface.class); + Mockito.when(implementation.getRegionInfo((byte [])Mockito.any())). + thenThrow(connectException); + Mockito.when(connection.getHRegionConnection((HServerAddress)Matchers.anyObject(), Matchers.anyBoolean())). + thenReturn(implementation); + Assert.assertNotNull(connection.getHRegionConnection(new HServerAddress(), false)); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, + new HServerAddress("example.com:1234")); + Assert.assertFalse(ct.verifyRootRegionLocation(100)); + } finally { + // Clean out root location or later tests will be confused... they presume + // start fresh in zk. + RootLocationEditor.deleteRootLocation(this.watcher); + } + } + @Test (expected = NotAllMetaRegionsOnlineException.class) public void testTimeoutWaitForRoot() throws IOException, InterruptedException { Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1002359) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -396,10 +396,6 @@ this.abortRequested = false; this.stopped = false; - - //HRegionInterface, - //HBaseRPCErrorHandler, Runnable, Watcher, Stoppable, OnlineRegions - // Server to handle client requests this.server = HBaseRPC.getServer(this, new Class>[]{HRegionInterface.class, HBaseRPCErrorHandler.class, @@ -429,19 +425,32 @@ } } + /** + * Bring up connection to zk ensemble and then wait until a master for this + * cluster and then after that, wait until cluster 'up' flag has been set. + * This is the order in which master does things. + * Finally put up a catalog tracker. + * @throws IOException + * @throws InterruptedException + */ private void initializeZooKeeper() throws IOException, InterruptedException { - // open connection to zookeeper and set primary watcher + // Open connection to zookeeper and set primary watcher zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + serverInfo.getServerAddress().getPort(), this); + // Create the master address manager, register with zk, and start it. Then + // block until a master is available. No point in starting up if no master + // running. + this.masterAddressManager = new MasterAddressTracker(zooKeeper, this); + this.masterAddressManager.start(); + this.masterAddressManager.blockUntilAvailable(); + + // Wait on cluster being up. Master will set this flag up in zookeeper + // when ready. this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this); this.clusterStatusTracker.start(); this.clusterStatusTracker.blockUntilAvailable(); - // create the master address manager, register with zk, and start it - masterAddressManager = new MasterAddressTracker(zooKeeper, this); - masterAddressManager.start(); - // Create the catalog tracker and start it; this.catalogTracker = new CatalogTracker(this.zooKeeper, this.connection, this, this.conf.getInt("hbase.regionserver.catalog.timeout", Integer.MAX_VALUE)); Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1002359) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -28,6 +28,7 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicReference; +import org.apache.commons.lang.NotImplementedException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -290,11 +291,13 @@ /** * Main processing loop for the HMaster. - * 1. Handle both fresh cluster start as well as failed over initialization of - * the HMaster. - * 2. Start the necessary services - * 3. Reassign the root region - * 4. The master is no longer closed - set "closed" to false + *
.META. if available and waits
* for up to the specified timeout if not immediately available. Throws an
- * exception if timed out waiting.
+ * exception if timed out waiting. This method differs from {@link #waitForMeta()}
+ * in that it will go ahead and verify the location gotten from ZooKeeper by
+ * trying trying to use returned connection.
* @param timeout maximum time to wait for meta availability, in milliseconds
* @return location of meta
* @throws InterruptedException if interrupted while waiting
@@ -282,15 +284,15 @@
public HServerAddress waitForMeta(long timeout)
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
long stop = System.currentTimeMillis() + timeout;
- synchronized(metaAvailable) {
- if(getMetaServerConnection(true) != null) {
+ synchronized (metaAvailable) {
+ if (getMetaServerConnection(true) != null) {
return metaLocation;
}
while(!metaAvailable.get() &&
(timeout == 0 || System.currentTimeMillis() < stop)) {
metaAvailable.wait(timeout);
}
- if(getMetaServerConnection(true) == null) {
+ if (getMetaServerConnection(true) == null) {
throw new NotAllMetaRegionsOnlineException(
"Timed out (" + timeout + "ms)");
}
@@ -365,6 +367,8 @@
return metaServer.getRegionInfo(regionName) != null;
} catch (NotServingRegionException e) {
t = e;
+ } catch (ConnectException e) {
+ t = e;
} catch (UndeclaredThrowableException e) {
// We can get a ConnectException wrapped by a UTE if client fails connect
// If not a ConnectException, rethrow.
@@ -377,6 +381,56 @@
}
/**
+ * Verify -ROOT- is deployed and accessible.
+ * @param timeout How long to wait on zk for root address (passed through to
+ * the internal call to {@link #waitForRootServerConnection(long)}.
+ * @return True if the -ROOT- location is healthy.
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public boolean verifyRootRegionLocation(final long timeout)
+ throws InterruptedException, IOException {
+ HRegionInterface implementation = null;
+ try {
+ implementation = waitForRootServerConnection(timeout);
+ } catch (NotAllMetaRegionsOnlineException e) {
+ // Pass
+ } catch (IOException e) {
+ // Unexpected exception
+ throw e;
+ }
+ if (implementation == null) return false;
+ return verifyRegionLocation(implementation,
+ HRegionInfo.ROOT_REGIONINFO.getRegionName());
+ }
+
+ /**
+ * Verify .META. is deployed and accessible.
+ * @param timeout How long to wait on zk for .META. address
+ * (passed through to the internal call to {@link #waitForMetaServerConnection(long)}.
+ * @return True if the .META. location is healthy.
+ * @throws IOException Some unexpected IOE.
+ * @throws InterruptedException
+ */
+ public boolean verifyMetaRegionLocation(final long timeout)
+ throws InterruptedException, IOException {
+ HRegionInterface implementation = null;
+ try {
+ implementation = waitForMetaServerConnection(timeout);
+ } catch (NotAllMetaRegionsOnlineException e) {
+ // Pass
+ } catch (NotServingRegionException e) {
+ // Pass
+ } catch (IOException e) {
+ // Unexpected.
+ throw e;
+ }
+ if (implementation == null) return false;
+ return verifyRegionLocation(implementation,
+ HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
+ }
+
+ /**
* Check if hsi was carrying -ROOT- or
* .META. and if so, clear out old locations.
* @param hsi Server that has crashed/shutdown.
Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1002359)
+++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy)
@@ -20,6 +20,7 @@
package org.apache.hadoop.hbase.ipc;
import java.io.IOException;
+import java.net.ConnectException;
import java.util.List;
import java.util.NavigableSet;
@@ -54,7 +55,7 @@
* @throws NotServingRegionException e
*/
public HRegionInfo getRegionInfo(final byte [] regionName)
- throws NotServingRegionException;
+ throws NotServingRegionException, ConnectException;
/**
* Return all the data for the row that matches row exactly,