diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
index fbd2d67..9aa812f 100644
--- a/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
+++ b/src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java
@@ -268,6 +268,21 @@ public class CatalogTracker {
}
/**
+ * Method used by master on startup trying to figure state of cluster.
+ * Returns the current meta location unless its null. In this latter case,
+ * it has not yet been set so go check whats up in -ROOT- and
+ * return that.
+ * @return{@link ServerName} for server hosting .META. or if null,
+ * we'll read the location that is up in -ROOT- table (which
+ * could be null or just plain stale).
+ * @throws IOException
+ */
+ public ServerName getMetaLocationOrReadLocationFromRoot() throws IOException {
+ ServerName sn = getMetaLocation();
+ return sn != null? sn: MetaReader.getMetaRegionLocation(this);
+ }
+
+ /**
* Waits indefinitely for availability of -ROOT-. Used during
* cluster startup.
* @throws InterruptedException if interrupted while waiting
@@ -336,7 +351,7 @@ public class CatalogTracker {
* @return connection to server hosting root
* @throws NotAllMetaRegionsOnlineException if timed out waiting
* @throws IOException
- * @deprecated Use {@link #getRootServerConnection(long)}
+ * @deprecated Use #getRootServerConnection(long)
*/
public HRegionInterface waitForRootServerConnectionDefault()
throws NotAllMetaRegionsOnlineException, IOException {
@@ -381,8 +396,7 @@ public class CatalogTracker {
// Now read the current .META. content from -ROOT-. Note: This goes via
// an HConnection. It has its own way of figuring root and meta locations
// which we have to wait on.
- ServerName newLocation =
- MetaReader.readRegionLocation(this, META_REGION_NAME);
+ ServerName newLocation = MetaReader.getMetaRegionLocation(this);
if (newLocation == null) return null;
HRegionInterface newConnection = getCachedConnection(newLocation);
diff --git a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
index c7c918c..e5e60a8 100644
--- a/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
+++ b/src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java
@@ -193,6 +193,8 @@ public class MetaReader {
throws IOException {
// Passing the CatalogTracker's connection configuration ensures this
// HTable instance uses the CatalogTracker's connection.
+ org.apache.hadoop.hbase.client.HConnection c = catalogTracker.getConnection();
+ if (c == null) throw new NullPointerException("No connection");
return new HTable(catalogTracker.getConnection().getConfiguration(), tableName);
}
@@ -251,7 +253,7 @@ public class MetaReader {
* @param metaServer connection to server hosting ROOT
* @return location of META in ROOT where location, or null if not available
* @throws IOException
- * @deprecated Does not retry; use {@link #readRegionLocation(CatalogTracker, byte[])}
+ * @deprecated Does not retry; use #getMetaRegionLocation(CatalogTracker)
*/
public static ServerName readMetaLocation(HRegionInterface metaServer)
throws IOException {
@@ -260,13 +262,26 @@ public class MetaReader {
}
/**
+ * Gets the location of .META. region by reading content of
+ * -ROOT-.
+ * @param ct
+ * @return location of .META. region as a {@link ServerName} or
+ * null if not found
+ * @throws IOException
+ */
+ static ServerName getMetaRegionLocation(final CatalogTracker ct)
+ throws IOException {
+ return MetaReader.readRegionLocation(ct, CatalogTracker.META_REGION_NAME);
+ }
+
+ /**
* Reads the location of the specified region
* @param catalogTracker
* @param regionName region whose location we are after
* @return location of region as a {@link ServerName} or null if not found
* @throws IOException
*/
- public static ServerName readRegionLocation(CatalogTracker catalogTracker,
+ static ServerName readRegionLocation(CatalogTracker catalogTracker,
byte [] regionName)
throws IOException {
Pair pair = getRegion(catalogTracker, regionName);
diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 7348007..19ed5b8 100644
--- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -462,7 +462,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
for (ServerName sn: this.regionServerTracker.getOnlineServers()) {
if (!this.serverManager.isServerOnline(sn)) {
// Not registered; add it.
- LOG.info("Registering server found up in zk: " + sn);
+ LOG.info("Registering server found up in zk but who has not yet " +
+ "reported in: " + sn);
this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD);
}
}
@@ -526,14 +527,23 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
status.setStatus("Assigning ROOT region");
boolean rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
+ ServerName expiredServer = null;
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
- this.assignmentManager.assignRoot();
+ ServerName currentRootServer = this.catalogTracker.getRootLocation();
+ if (expireIfOnline(currentRootServer)) {
+ // We are expiring this server. The processing of expiration will assign
+ // root so don't do it here.
+ expiredServer = currentRootServer;
+ } else {
+ // Root was not on an online server when we failed verification
+ this.assignmentManager.assignRoot();
+ }
this.catalogTracker.waitForRoot();
//This guarantees that the transition has completed
this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
assigned++;
} else {
- // Region already assigned. We didnt' assign it. Add to in-memory state.
+ // Region already assigned. We didn't assign it. Add to in-memory state.
this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO,
this.catalogTracker.getRootLocation());
}
@@ -545,7 +555,15 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO);
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
- this.assignmentManager.assignMeta();
+ ServerName currentMetaServer =
+ this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
+ if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) {
+ // We are expiring the server that is carrying meta already.
+ // The expiration processing will take care of reassigning meta.
+ expireIfOnline(currentMetaServer);
+ } else {
+ this.assignmentManager.assignMeta();
+ }
this.catalogTracker.waitForMeta();
// Above check waits for general meta availability but this does not
// guarantee that the transition has completed
@@ -562,6 +580,19 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
return assigned;
}
+ /**
+ * Expire a server if we find it is one of the online servers set.
+ * @param sn ServerName to check.
+ * @return True if server was online and so we expired it as unreachable.
+ */
+ private boolean expireIfOnline(final ServerName sn) {
+ if (sn == null) return false;
+ if (!this.serverManager.isServerOnline(sn)) return false;
+ LOG.info("Forcing expiration of " + sn);
+ this.serverManager.expireServer(sn);
+ return true;
+ }
+
@Override
public ProtocolSignature getProtocolSignature(
String protocol, long version, int clientMethodsHashCode)