Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 1002348) +++ CHANGES.txt (revision 1002880) @@ -552,6 +552,7 @@ HBASE-2995 Incorrect dependency on Log class from Jetty HBASE-3038 WALReaderFSDataInputStream.getPos() fails if Filesize > MAX_INT (Nicolas Spiegelberg via Stack) + HBASE-3047 If new master crashes, restart is messy IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable Index: src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (revision 1002348) +++ src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (revision 1002880) @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.catalog; import java.io.IOException; +import java.net.ConnectException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; @@ -36,13 +37,13 @@ import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; -import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.util.Progressable; @@ -52,6 +53,7 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.Matchers; import org.mockito.Mockito; /** @@ -100,6 +102,61 @@ return ct; } + @Test public void testGetMetaServerConnectionFails() + throws IOException, InterruptedException, KeeperException { + HConnection connection = Mockito.mock(HConnection.class); + ConnectException connectException = + new ConnectException("Connection refused"); + final HRegionInterface implementation = + Mockito.mock(HRegionInterface.class); + Mockito.when(implementation.get((byte [])Mockito.any(), (Get)Mockito.any())). + thenThrow(connectException); + Mockito.when(connection.getHRegionConnection((HServerAddress)Matchers.anyObject(), Matchers.anyBoolean())). + thenReturn(implementation); + Assert.assertNotNull(connection.getHRegionConnection(new HServerAddress(), false)); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, + new HServerAddress("example.com:1234")); + Assert.assertFalse(ct.verifyMetaRegionLocation(100)); + } finally { + // Clean out root location or later tests will be confused... they presume + // start fresh in zk. + RootLocationEditor.deleteRootLocation(this.watcher); + } + } + + /** + * Test get of root region fails properly if nothing to connect to. + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test + public void testVerifyRootRegionLocationFails() + throws IOException, InterruptedException, KeeperException { + HConnection connection = Mockito.mock(HConnection.class); + ConnectException connectException = + new ConnectException("Connection refused"); + final HRegionInterface implementation = + Mockito.mock(HRegionInterface.class); + Mockito.when(implementation.getRegionInfo((byte [])Mockito.any())). + thenThrow(connectException); + Mockito.when(connection.getHRegionConnection((HServerAddress)Matchers.anyObject(), Matchers.anyBoolean())). + thenReturn(implementation); + Assert.assertNotNull(connection.getHRegionConnection(new HServerAddress(), false)); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, + new HServerAddress("example.com:1234")); + Assert.assertFalse(ct.verifyRootRegionLocation(100)); + } finally { + // Clean out root location or later tests will be confused... they presume + // start fresh in zk. + RootLocationEditor.deleteRootLocation(this.watcher); + } + } + @Test (expected = NotAllMetaRegionsOnlineException.class) public void testTimeoutWaitForRoot() throws IOException, InterruptedException { Index: src/main/java/org/apache/hadoop/hbase/RemoteExceptionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/RemoteExceptionHandler.java (revision 1002348) +++ src/main/java/org/apache/hadoop/hbase/RemoteExceptionHandler.java (revision 1002880) @@ -79,6 +79,8 @@ * @throws IOException indicating a server error ocurred if the decoded * exception is not an IOException. The decoded exception is set as * the cause. + * @deprecated Use {@link RemoteException#unwrapRemoteException()} instead. + * In fact we should look into deprecating this whole class - St.Ack 2010929 */ public static IOException decodeRemoteException(final RemoteException re) throws IOException { Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (revision 1002348) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java (revision 1002880) @@ -32,11 +32,6 @@ * RegionServers. */ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener { - /** - * Pass this if you do not want a timeout. - */ - public final static long NO_TIMEOUT = -1; - /** Path of node being tracked */ protected final String node; @@ -94,7 +89,7 @@ */ public synchronized byte [] blockUntilAvailable() throws InterruptedException { - return blockUntilAvailable(NO_TIMEOUT); + return blockUntilAvailable(0); } /** @@ -102,18 +97,22 @@ * specified timeout has elapsed. * * @param timeout maximum time to wait for the node data to be available, - * in milliseconds. Pass {@link #NO_TIMEOUT} for no timeout. + * n milliseconds. Pass 0 for no timeout. * @return data of the node * @throws InterruptedException if the waiting thread is interrupted */ public synchronized byte [] blockUntilAvailable(long timeout) throws InterruptedException { - if (timeout != NO_TIMEOUT && timeout < 0) throw new IllegalArgumentException(); + if (timeout < 0) throw new IllegalArgumentException(); + boolean notimeout = timeout == 0; long startTime = System.currentTimeMillis(); long remaining = timeout; - while ((remaining == NO_TIMEOUT || remaining > 0) && this.data == null) { - if (remaining == NO_TIMEOUT) wait(); - else wait(remaining); + while ((notimeout || remaining > 0) && this.data == null) { + if (notimeout) { + wait(); + continue; + } + wait(remaining); remaining = timeout - (System.currentTimeMillis() - startTime); } return data; Index: src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java (revision 1002348) +++ src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java (revision 1002880) @@ -68,6 +68,7 @@ public Leases(final int leasePeriod, final int leaseCheckFrequency) { this.leasePeriod = leasePeriod; this.leaseCheckFrequency = leaseCheckFrequency; + setDaemon(true); } /** Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1002348) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1002880) @@ -48,7 +48,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReentrantReadWriteLock; -import com.google.common.base.Function; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -57,7 +56,6 @@ import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.HMsg; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; @@ -72,6 +70,7 @@ import org.apache.hadoop.hbase.UnknownRowLockException; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.YouAreDeadException; +import org.apache.hadoop.hbase.HConstants.OperationStatusCode; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.RootLocationEditor; @@ -120,9 +119,10 @@ import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.net.DNS; -import org.apache.hadoop.util.StringUtils; import org.apache.zookeeper.KeeperException; +import com.google.common.base.Function; + /** * HRegionServer makes a set of HRegions available to clients. It checks in with * the HMaster. There are many HRegionServers in a single HBase deployment. @@ -396,10 +396,6 @@ this.abortRequested = false; this.stopped = false; - - //HRegionInterface, - //HBaseRPCErrorHandler, Runnable, Watcher, Stoppable, OnlineRegions - // Server to handle client requests this.server = HBaseRPC.getServer(this, new Class>[]{HRegionInterface.class, HBaseRPCErrorHandler.class, @@ -429,19 +425,32 @@ } } + /** + * Bring up connection to zk ensemble and then wait until a master for this + * cluster and then after that, wait until cluster 'up' flag has been set. + * This is the order in which master does things. + * Finally put up a catalog tracker. + * @throws IOException + * @throws InterruptedException + */ private void initializeZooKeeper() throws IOException, InterruptedException { - // open connection to zookeeper and set primary watcher + // Open connection to zookeeper and set primary watcher zooKeeper = new ZooKeeperWatcher(conf, REGIONSERVER + serverInfo.getServerAddress().getPort(), this); + // Create the master address manager, register with zk, and start it. Then + // block until a master is available. No point in starting up if no master + // running. + this.masterAddressManager = new MasterAddressTracker(zooKeeper, this); + this.masterAddressManager.start(); + this.masterAddressManager.blockUntilAvailable(); + + // Wait on cluster being up. Master will set this flag up in zookeeper + // when ready. this.clusterStatusTracker = new ClusterStatusTracker(this.zooKeeper, this); this.clusterStatusTracker.start(); this.clusterStatusTracker.blockUntilAvailable(); - // create the master address manager, register with zk, and start it - masterAddressManager = new MasterAddressTracker(zooKeeper, this); - masterAddressManager.start(); - // Create the catalog tracker and start it; this.catalogTracker = new CatalogTracker(this.zooKeeper, this.connection, this, this.conf.getInt("hbase.regionserver.catalog.timeout", Integer.MAX_VALUE)); Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1002348) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1002880) @@ -81,6 +81,7 @@ this.fs = FileSystem.get(conf); // set up the archived logs path this.oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); + createInitialFileSystemLayout(); } /** @@ -91,8 +92,9 @@ * *
.META. and -ROOT- deployed.
* @throws KeeperException
* @throws IOException
*/
@@ -170,6 +170,12 @@
// synchronized. The presumption is that in this case it is safe since this
// method is being played by a single thread on startup.
+ // TODO: Check list of user regions and their assignments against regionservers.
+ // TODO: Regions that have a null location and are not in regionsInTransitions
+ // need to be handled.
+ // TODO: Regions that are on servers that are not in our online list need
+ // reassigning.
+
// Scan META to build list of existing regions, servers, and assignment
rebuildUserRegions();
// Pickup any disabled tables
@@ -183,45 +189,89 @@
}
LOG.info("Failed-over master needs to process " + nodes.size() +
" regions in transition");
- for (String regionName: nodes) {
- RegionTransitionData data = ZKAssign.getData(watcher, regionName);
- HRegionInfo regionInfo =
- MetaReader.getRegion(catalogTracker, data.getRegionName()).getFirst();
- String encodedName = regionInfo.getEncodedName();
- switch(data.getEventType()) {
- case RS_ZK_REGION_CLOSING:
- // Just insert region into RIT.
- // If this never updates the timeout will trigger new assignment
- regionsInTransition.put(encodedName,
- new RegionState(regionInfo, RegionState.State.CLOSING,
- data.getStamp()));
- break;
+ for (String encodedRegionName: nodes) {
+ processRegionInTransition(encodedRegionName, null);
+ }
+ }
- case RS_ZK_REGION_CLOSED:
- // Region is closed, insert into RIT and handle it
- regionsInTransition.put(encodedName,
- new RegionState(regionInfo, RegionState.State.CLOSED,
- data.getStamp()));
- new ClosedRegionHandler(master, this, data, regionInfo).process();
- break;
+ /**
+ * If region is up in zk in transition, then do fixup and block and wait until
+ * the region is assigned and out of transition. Used on startup for
+ * catalog regions.
+ * @param hri Region to look for.
+ * @return True if we processed a region in transition else false if region
+ * was not up in zk in transition.
+ * @throws InterruptedException
+ * @throws KeeperException
+ * @throws IOException
+ */
+ boolean processRegionInTransitionAndBlockUntilAssigned(final HRegionInfo hri)
+ throws InterruptedException, KeeperException, IOException {
+ boolean intransistion = processRegionInTransition(hri.getEncodedName(), hri);
+ if (!intransistion) return intransistion;
+ synchronized(this.regionsInTransition) {
+ while (!this.master.isStopped() &&
+ this.regionsInTransition.containsKey(hri.getEncodedName())) {
+ this.regionsInTransition.wait();
+ }
+ }
+ return intransistion;
+ }
- case RS_ZK_REGION_OPENING:
- // Just insert region into RIT
- // If this never updates the timeout will trigger new assignment
- regionsInTransition.put(encodedName,
- new RegionState(regionInfo, RegionState.State.OPENING,
- data.getStamp()));
- break;
+ /**
+ * Process failover of encodedName. Look in
+ * @param encodedRegionName Region to process failover for.
+ * @param encodedRegionName RegionInfo. If null we'll go get it from meta table.
+ * @return
+ * @throws KeeperException
+ * @throws IOException
+ */
+ boolean processRegionInTransition(final String encodedRegionName,
+ final HRegionInfo regionInfo)
+ throws KeeperException, IOException {
+ RegionTransitionData data = ZKAssign.getData(watcher, encodedRegionName);
+ if (data == null) return false;
+ HRegionInfo hri = (regionInfo != null)? regionInfo:
+ MetaReader.getRegion(catalogTracker, data.getRegionName()).getFirst();
+ processRegionsInTransition(data, hri);
+ return true;
+ }
- case RS_ZK_REGION_OPENED:
- // Region is opened, insert into RIT and handle it
- regionsInTransition.put(encodedName,
- new RegionState(regionInfo, RegionState.State.OPENING,
- data.getStamp()));
- new OpenedRegionHandler(master, this, data, regionInfo,
- serverManager.getServerInfo(data.getServerName())).process();
- break;
- }
+ void processRegionsInTransition(final RegionTransitionData data,
+ final HRegionInfo regionInfo)
+ throws KeeperException {
+ String encodedRegionName = regionInfo.getEncodedName();
+ LOG.info("Processing region " + regionInfo.getRegionNameAsString() +
+ " in state " + data.getEventType());
+ switch (data.getEventType()) {
+ case RS_ZK_REGION_CLOSING:
+ // Just insert region into RIT.
+ // If this never updates the timeout will trigger new assignment
+ regionsInTransition.put(encodedRegionName, new RegionState(
+ regionInfo, RegionState.State.CLOSING, data.getStamp()));
+ break;
+
+ case RS_ZK_REGION_CLOSED:
+ // Region is closed, insert into RIT and handle it
+ regionsInTransition.put(encodedRegionName, new RegionState(
+ regionInfo, RegionState.State.CLOSED, data.getStamp()));
+ new ClosedRegionHandler(master, this, data, regionInfo).process();
+ break;
+
+ case RS_ZK_REGION_OPENING:
+ // Just insert region into RIT
+ // If this never updates the timeout will trigger new assignment
+ regionsInTransition.put(encodedRegionName, new RegionState(
+ regionInfo, RegionState.State.OPENING, data.getStamp()));
+ break;
+
+ case RS_ZK_REGION_OPENED:
+ // Region is opened, insert into RIT and handle it
+ regionsInTransition.put(encodedRegionName, new RegionState(
+ regionInfo, RegionState.State.OPENING, data.getStamp()));
+ new OpenedRegionHandler(master, this, data, regionInfo,
+ serverManager.getServerInfo(data.getServerName())).process();
+ break;
}
}
@@ -752,11 +802,11 @@
private void rebuildUserRegions() throws IOException {
Map-ROOT- and .META. are assigned. If not,
+ * assign them.
+ * @throws InterruptedException
* @throws IOException
+ * @throws KeeperException
+ * @return Count of regions we assigned.
*/
- private static void clusterStarterInitializations(final MasterFileSystem mfs,
- final ServerManager sm, final CatalogTracker ct, final AssignmentManager am)
- throws IOException, InterruptedException, KeeperException {
- // Check filesystem has required basics
- mfs.initialize();
- // TODO: Should do this in background rather than block master startup
- // TODO: Do we want to do this before/while/after RSs check in?
- // It seems that this method looks at active RSs but happens
- // concurrently with when we expect them to be checking in
- mfs.splitLogAfterStartup(sm.getOnlineServers());
- // Clean out current state of unassigned
- am.cleanoutUnassigned();
- // assign the root region
- am.assignRoot();
- ct.waitForRoot();
- // assign the meta region
- am.assignMeta();
- ct.waitForMeta();
- // above check waits for general meta availability but this does not
+ int assignRootAndMeta()
+ throws InterruptedException, IOException, KeeperException {
+ int assigned = 0;
+ long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
+
+ // Work on ROOT region. Is it in zk in transition?
+ boolean rit = this.assignmentManager.
+ processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
+ if (!catalogTracker.verifyRootRegionLocation(timeout)) {
+ this.assignmentManager.assignRoot();
+ this.catalogTracker.waitForRoot();
+ assigned++;
+ }
+ LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit);
+
+ // Work on meta region
+ rit = this.assignmentManager.
+ processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
+ if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
+ this.assignmentManager.assignMeta();
+ this.catalogTracker.waitForMeta();
+ // Above check waits for general meta availability but this does not
// guarantee that the transition has completed
- am.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
- am.assignAllUserRegions();
+ this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
+ assigned++;
+ }
+ LOG.info(".META. assigned=" + assigned + ", rit=" + rit);
+ return assigned;
}
/*
Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1002348)
+++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1002880)
@@ -36,11 +36,16 @@
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.master.DeadServer;
import org.apache.hadoop.hbase.master.MasterServices;
+import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.zookeeper.KeeperException;
-
+/**
+ * Process server shutdown.
+ * Server-to-handle must be already in the deadservers lists. See
+ * {@link ServerManager#expireServer(HServerInfo)}.
+ */
public class ServerShutdownHandler extends EventHandler {
private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class);
private final HServerInfo hsi;
@@ -55,8 +60,9 @@
this.server = server;
this.services = services;
this.deadServers = deadServers;
- // Add to dead servers.
- this.deadServers.add(hsi.getServerName());
+ if (this.deadServers.contains(hsi.getServerName())) {
+ LOG.warn(hsi.getServerName() + " is NOT in deadservers; it should be!");
+ }
}
@Override
Index: src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java (revision 1002348)
+++ src/main/java/org/apache/hadoop/hbase/mapreduce/package-info.java (revision 1002880)
@@ -1,5 +1,5 @@
/*
- * Copyright 20010 The Apache Software Foundation
+ * Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1002348)
+++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1002880)
@@ -20,7 +20,6 @@
package org.apache.hadoop.hbase.catalog;
import java.io.IOException;
-import java.lang.reflect.UndeclaredThrowableException;
import java.net.ConnectException;
import java.util.concurrent.atomic.AtomicBoolean;
@@ -40,6 +39,7 @@
import org.apache.hadoop.hbase.zookeeper.MetaNodeTracker;
import org.apache.hadoop.hbase.zookeeper.RootRegionTracker;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.hadoop.ipc.RemoteException;
import org.apache.zookeeper.KeeperException;
/**
@@ -271,7 +271,9 @@
/**
* Gets the current location for .META. if available and waits
* for up to the specified timeout if not immediately available. Throws an
- * exception if timed out waiting.
+ * exception if timed out waiting. This method differs from {@link #waitForMeta()}
+ * in that it will go ahead and verify the location gotten from ZooKeeper by
+ * trying trying to use returned connection.
* @param timeout maximum time to wait for meta availability, in milliseconds
* @return location of meta
* @throws InterruptedException if interrupted while waiting
@@ -282,15 +284,15 @@
public HServerAddress waitForMeta(long timeout)
throws InterruptedException, IOException, NotAllMetaRegionsOnlineException {
long stop = System.currentTimeMillis() + timeout;
- synchronized(metaAvailable) {
- if(getMetaServerConnection(true) != null) {
+ synchronized (metaAvailable) {
+ if (getMetaServerConnection(true) != null) {
return metaLocation;
}
while(!metaAvailable.get() &&
(timeout == 0 || System.currentTimeMillis() < stop)) {
metaAvailable.wait(timeout);
}
- if(getMetaServerConnection(true) == null) {
+ if (getMetaServerConnection(true) == null) {
throw new NotAllMetaRegionsOnlineException(
"Timed out (" + timeout + "ms)");
}
@@ -336,7 +338,6 @@
}
private void setMetaLocation(HServerAddress metaLocation) {
- LOG.info("Found new META location, " + metaLocation);
metaAvailable.set(true);
this.metaLocation = metaLocation;
// no synchronization because these are private and already under lock
@@ -359,24 +360,70 @@
}
private boolean verifyRegionLocation(HRegionInterface metaServer,
- byte [] regionName) {
+ byte [] regionName)
+ throws IOException {
+ if (metaServer == null) {
+ LOG.info("Passed metaserver is null");
+ return false;
+ }
Throwable t = null;
try {
+ // Am expecting only two possible exceptions here; unable
+ // to connect to the regionserver or NotServingRegionException wrapped
+ // in the hadoop rpc RemoteException.
return metaServer.getRegionInfo(regionName) != null;
- } catch (NotServingRegionException e) {
+ } catch (ConnectException e) {
t = e;
- } catch (UndeclaredThrowableException e) {
- // We can get a ConnectException wrapped by a UTE if client fails connect
- // If not a ConnectException, rethrow.
- if (!(e.getCause() instanceof ConnectException)) throw e;
- t = e.getCause();
+ } catch (RemoteException e) {
+ IOException ioe = e.unwrapRemoteException();
+ if (ioe instanceof NotServingRegionException) {
+ t = ioe;
+ } else {
+ throw e;
+ }
}
LOG.info("Failed verification of " + Bytes.toString(regionName) +
- ": " + t.getMessage());
+ ", assigning anew: " + t);
return false;
}
/**
+ * Verify -ROOT- is deployed and accessible.
+ * @param timeout How long to wait on zk for root address (passed through to
+ * the internal call to {@link #waitForRootServerConnection(long)}.
+ * @return True if the -ROOT- location is healthy.
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ public boolean verifyRootRegionLocation(final long timeout)
+ throws InterruptedException, IOException {
+ HRegionInterface connection = null;
+ try {
+ connection = waitForRootServerConnection(timeout);
+ } catch (NotAllMetaRegionsOnlineException e) {
+ // Pass
+ } catch (IOException e) {
+ // Unexpected exception
+ throw e;
+ }
+ return (connection == null)? false:
+ verifyRegionLocation(connection, HRegionInfo.ROOT_REGIONINFO.getRegionName());
+ }
+
+ /**
+ * Verify .META. is deployed and accessible.
+ * @param timeout How long to wait on zk for .META. address
+ * (passed through to the internal call to {@link #waitForMetaServerConnection(long)}.
+ * @return True if the .META. location is healthy.
+ * @throws IOException Some unexpected IOE.
+ * @throws InterruptedException
+ */
+ public boolean verifyMetaRegionLocation(final long timeout)
+ throws InterruptedException, IOException {
+ return getMetaServerConnection(true) != null;
+ }
+
+ /**
* Check if hsi was carrying -ROOT- or
* .META. and if so, clear out old locations.
* @param hsi Server that has crashed/shutdown.
Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1002348)
+++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1002880)
@@ -20,6 +20,7 @@
package org.apache.hadoop.hbase.ipc;
import java.io.IOException;
+import java.net.ConnectException;
import java.util.List;
import java.util.NavigableSet;
@@ -38,6 +39,7 @@
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.ipc.RemoteException;
/**
* Clients interact with HRegionServers using a handle to the HRegionInterface.
@@ -51,10 +53,12 @@
*
* @param regionName name of the region
* @return HRegionInfo object for region
- * @throws NotServingRegionException e
+ * @throws NotServingRegionException
+ * @throws ConnectException
+ * @throws IOException This can manifest as an Hadoop ipc {@link RemoteException}
*/
public HRegionInfo getRegionInfo(final byte [] regionName)
- throws NotServingRegionException;
+ throws NotServingRegionException, ConnectException, IOException;
/**
* Return all the data for the row that matches row exactly,
Index: src/main/resources/hbase-default.xml
===================================================================
--- src/main/resources/hbase-default.xml (revision 1002348)
+++ src/main/resources/hbase-default.xml (revision 1002880)
@@ -157,7 +157,7 @@