Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 11031) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -617,14 +617,31 @@ public int waitForRegionServers() throws InterruptedException { long interval = this.master.getConfiguration(). - getLong("hbase.master.wait.on.regionservers.interval", 3000); + getLong("hbase.master.wait.on.regionservers.interval", 1500); + long timeout = this.master.getConfiguration(). + getLong("hbase.master.wait.on.regionservers.timeout", 4500); + int minToStart = this.master.getConfiguration(). + getInt("hbase.master.wait.on.regionservers.mintostart", 1); + int maxToStart = this.master.getConfiguration(). + getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE); // So, number of regionservers > 0 and its been n since last check in, break, // else just stall here int count = 0; + long slept = 0; for (int oldcount = countOfRegionServers(); !this.master.isStopped();) { Thread.sleep(interval); + slept += interval; count = countOfRegionServers(); - if (count == oldcount && count > 0) break; + if (count == oldcount && count >= minToStart && slept >= timeout) { + LOG.info("Finished waiting for regionserver count to settle; " + + "count=" + count + ", sleptFor=" + slept); + break; + } + if (count >= maxToStart) { + LOG.info("At least the max configured number of regionserver(s) have " + + "checked in: " + count); + break; + } if (count == 0) { LOG.info("Waiting on regionserver(s) to checkin"); } else { Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 11031) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy) @@ -69,8 +69,13 @@ final int NUM_MASTERS = 3; final int NUM_RS = 3; + // Create config to use for this cluster + Configuration conf = HBaseConfiguration.create(); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); + // Start the cluster - HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); @@ -221,6 +226,8 @@ // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); // Start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); @@ -525,6 +532,8 @@ // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); + conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1); + conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2); // Create and start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);