diff --git src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java index d51efb9..39c5a88 100644 --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java @@ -268,6 +268,21 @@ public class CatalogTracker { } /** + * Method used by master on startup trying to figure state of cluster. + * Returns the current meta location unless its null. In this latter case, + * it has not yet been set so go check whats up in -ROOT- and + * return that. + * @return{@link ServerName} for server hosting .META. or if null, + * we'll read the location that is up in -ROOT- table (which + * could be null or just plain stale). + * @throws IOException + */ + public ServerName getMetaLocationOrReadLocationFromRoot() throws IOException { + ServerName sn = getMetaLocation(); + return sn != null? sn: MetaReader.getMetaRegionLocation(this); + } + + /** * Waits indefinitely for availability of -ROOT-. Used during * cluster startup. * @throws InterruptedException if interrupted while waiting @@ -306,7 +321,7 @@ public class CatalogTracker { * @throws InterruptedException * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException - * @deprecated Use {@link #getRootServerConnection(long)} + * @deprecated Use #getRootServerConnection(long) */ public HRegionInterface waitForRootServerConnection(long timeout) throws InterruptedException, NotAllMetaRegionsOnlineException, IOException { @@ -336,7 +351,7 @@ public class CatalogTracker { * @return connection to server hosting root * @throws NotAllMetaRegionsOnlineException if timed out waiting * @throws IOException - * @deprecated Use {@link #getRootServerConnection(long)} + * @deprecated Use #getRootServerConnection(long) */ public HRegionInterface waitForRootServerConnectionDefault() throws NotAllMetaRegionsOnlineException, IOException { @@ -381,8 +396,7 @@ public class CatalogTracker { // Now read the current .META. content from -ROOT-. Note: This goes via // an HConnection. It has its own way of figuring root and meta locations // which we have to wait on. - ServerName newLocation = - MetaReader.readRegionLocation(this, META_REGION_NAME); + ServerName newLocation = MetaReader.getMetaRegionLocation(this); if (newLocation == null) { LOG.debug(".META. server unavailable."); return null; diff --git src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index c7c918c..e5e60a8 100644 --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -193,6 +193,8 @@ public class MetaReader { throws IOException { // Passing the CatalogTracker's connection configuration ensures this // HTable instance uses the CatalogTracker's connection. + org.apache.hadoop.hbase.client.HConnection c = catalogTracker.getConnection(); + if (c == null) throw new NullPointerException("No connection"); return new HTable(catalogTracker.getConnection().getConfiguration(), tableName); } @@ -251,7 +253,7 @@ public class MetaReader { * @param metaServer connection to server hosting ROOT * @return location of META in ROOT where location, or null if not available * @throws IOException - * @deprecated Does not retry; use {@link #readRegionLocation(CatalogTracker, byte[])} + * @deprecated Does not retry; use #getMetaRegionLocation(CatalogTracker) */ public static ServerName readMetaLocation(HRegionInterface metaServer) throws IOException { @@ -260,13 +262,26 @@ public class MetaReader { } /** + * Gets the location of .META. region by reading content of + * -ROOT-. + * @param ct + * @return location of .META. region as a {@link ServerName} or + * null if not found + * @throws IOException + */ + static ServerName getMetaRegionLocation(final CatalogTracker ct) + throws IOException { + return MetaReader.readRegionLocation(ct, CatalogTracker.META_REGION_NAME); + } + + /** * Reads the location of the specified region * @param catalogTracker * @param regionName region whose location we are after * @return location of region as a {@link ServerName} or null if not found * @throws IOException */ - public static ServerName readRegionLocation(CatalogTracker catalogTracker, + static ServerName readRegionLocation(CatalogTracker catalogTracker, byte [] regionName) throws IOException { Pair pair = getRegion(catalogTracker, regionName); diff --git src/main/java/org/apache/hadoop/hbase/master/HMaster.java src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 74ee0be..d64f6dd 100644 --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -462,7 +462,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { for (ServerName sn: this.regionServerTracker.getOnlineServers()) { if (!this.serverManager.isServerOnline(sn)) { // Not registered; add it. - LOG.info("Registering server found up in zk: " + sn); + LOG.info("Registering server found up in zk but who has not yet " + + "reported in: " + sn); this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); } } @@ -526,14 +527,23 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { status.setStatus("Assigning ROOT region"); boolean rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO); + ServerName expiredServer = null; if (!catalogTracker.verifyRootRegionLocation(timeout)) { - this.assignmentManager.assignRoot(); + ServerName currentRootServer = this.catalogTracker.getRootLocation(); + if (expireIfOnline(currentRootServer)) { + // We are expiring this server. The processing of expiration will assign + // root so don't do it here. + expiredServer = currentRootServer; + } else { + // Root was not on an online server when we failed verification + this.assignmentManager.assignRoot(); + } this.catalogTracker.waitForRoot(); //This guarantees that the transition has completed this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO); assigned++; } else { - // Region already assigned. We didnt' assign it. Add to in-memory state. + // Region already assigned. We didn't assign it. Add to in-memory state. this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO, this.catalogTracker.getRootLocation()); } @@ -545,7 +555,15 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO); if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) { - this.assignmentManager.assignMeta(); + ServerName currentMetaServer = + this.catalogTracker.getMetaLocationOrReadLocationFromRoot(); + if (isExpiring(expiredServer, currentMetaServer) || + expireIfOnline(currentMetaServer)) { + // We are expiring the server that is carrying meta because unreachable + // The expiration processing will take care of reassigning meta. + } else { + this.assignmentManager.assignMeta(); + } this.catalogTracker.waitForMeta(); // Above check waits for general meta availability but this does not // guarantee that the transition has completed @@ -562,6 +580,28 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return assigned; } + /** + * @param expiring + * @param sn + * @return True if passed servers are non-null and equals + */ + private boolean isExpiring(final ServerName expiring, final ServerName sn) { + return expiring != null && sn != null && sn.equals(expiring); + } + + /** + * Expire a server if we find it is one of the online servers set. + * @param sn ServerName to check. + * @return True if server was online and so we expired it as unreachable. + */ + private boolean expireIfOnline(final ServerName sn) { + if (sn == null) return false; + if (!this.serverManager.isServerOnline(sn)) return false; + LOG.info("Forcing expiration of " + sn); + this.serverManager.expireServer(sn); + return true; + } + @Override public ProtocolSignature getProtocolSignature( String protocol, long version, int clientMethodsHashCode)