diff --git src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java index c7c918c..1c44335 100644 --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java @@ -193,6 +193,8 @@ public class MetaReader { throws IOException { // Passing the CatalogTracker's connection configuration ensures this // HTable instance uses the CatalogTracker's connection. + org.apache.hadoop.hbase.client.HConnection c = catalogTracker.getConnection(); + if (c == null) throw new NullPointerException("No connection"); return new HTable(catalogTracker.getConnection().getConfiguration(), tableName); } diff --git src/main/java/org/apache/hadoop/hbase/master/HMaster.java src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 74ee0be..5193f5f 100644 --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -462,7 +462,8 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { for (ServerName sn: this.regionServerTracker.getOnlineServers()) { if (!this.serverManager.isServerOnline(sn)) { // Not registered; add it. - LOG.info("Registering server found up in zk: " + sn); + LOG.info("Registering server found up in zk but who has not yet " + + "reported in: " + sn); this.serverManager.recordNewServer(sn, HServerLoad.EMPTY_HSERVERLOAD); } } @@ -526,14 +527,23 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { status.setStatus("Assigning ROOT region"); boolean rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO); + ServerName expiredServer = null; if (!catalogTracker.verifyRootRegionLocation(timeout)) { - this.assignmentManager.assignRoot(); + ServerName currentRootServer = this.catalogTracker.getRootLocation(); + if (expireIfOnline(currentRootServer)) { + // We're expiring this server. The processing of server expiration will + // assign root. + expiredServer = currentRootServer; + } else { + // Root was not on an online server when we failed verification + this.assignmentManager.assignRoot(); + } this.catalogTracker.waitForRoot(); //This guarantees that the transition has completed this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO); assigned++; } else { - // Region already assigned. We didnt' assign it. Add to in-memory state. + // Region already assigned. We didn't assign it. Add to in-memory state. this.assignmentManager.regionOnline(HRegionInfo.ROOT_REGIONINFO, this.catalogTracker.getRootLocation()); } @@ -545,7 +555,14 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO); if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) { - this.assignmentManager.assignMeta(); + ServerName currentMetaServer = this.catalogTracker.getMetaLocation(); + if ((currentMetaServer != null && currentMetaServer.equals(expiredServer)) || + expireIfOnline(currentMetaServer)) { + // We are expiring the server that is carrying meta because unreachable + // The expiration processing will take care of reassigning meta. + } else { + this.assignmentManager.assignMeta(); + } this.catalogTracker.waitForMeta(); // Above check waits for general meta availability but this does not // guarantee that the transition has completed @@ -562,6 +579,18 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { return assigned; } + /** + * @param sn If online, it was unreachable. + * @return True if server was online and so we expired it as unreachable. + */ + private boolean expireIfOnline(final ServerName sn) { + if (sn != null && !this.serverManager.isServerOnline(sn)) return false; + LOG.info("Forcing expiration of " + sn + + " because seems unreachable and its carrying root/meta"); + this.serverManager.expireServer(sn); + return true; + } + @Override public ProtocolSignature getProtocolSignature( String protocol, long version, int clientMethodsHashCode) diff --git src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 722c086..0e241ae 100644 --- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -68,7 +68,7 @@ public class TestMasterFailover { * the cluster. * @throws Exception */ - @Test (timeout=240000) + @Test (timeout=180000) public void testSimpleMasterFailover() throws Exception { final int NUM_MASTERS = 3; @@ -136,7 +136,7 @@ public class TestMasterFailover { TEST_UTIL.shutdownMiniCluster(); } - @Test + @Test (timeout=180000) public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState() throws Exception { final int NUM_MASTERS = 1;