Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1399860) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -87,6 +87,18 @@ */ @InterfaceAudience.Private public class ServerManager { + public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART = + "hbase.master.wait.on.regionservers.maxtostart"; + + public static final String WAIT_ON_REGIONSERVERS_MINTOSTART = + "hbase.master.wait.on.regionservers.mintostart"; + + public static final String WAIT_ON_REGIONSERVERS_TIMEOUT = + "hbase.master.wait.on.regionservers.timeout"; + + public static final String WAIT_ON_REGIONSERVERS_INTERVAL = + "hbase.master.wait.on.regionservers.interval"; + private static final Log LOG = LogFactory.getLog(ServerManager.class); // Set if we are to shutdown the cluster. @@ -674,25 +686,38 @@ * Wait for the region servers to report in. * We will wait until one of this condition is met: * - the master is stopped - * - the 'hbase.master.wait.on.regionservers.timeout' is reached * - the 'hbase.master.wait.on.regionservers.maxtostart' number of * region servers is reached * - the 'hbase.master.wait.on.regionservers.mintostart' is reached AND * there have been no new region server in for - * 'hbase.master.wait.on.regionservers.interval' time + * 'hbase.master.wait.on.regionservers.interval' time AND + * the 'hbase.master.wait.on.regionservers.timeout' is reached * * @throws InterruptedException */ public void waitForRegionServers(MonitoredTask status) throws InterruptedException { final long interval = this.master.getConfiguration(). - getLong("hbase.master.wait.on.regionservers.interval", 1500); + getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500); final long timeout = this.master.getConfiguration(). - getLong("hbase.master.wait.on.regionservers.timeout", 4500); - final int minToStart = this.master.getConfiguration(). - getInt("hbase.master.wait.on.regionservers.mintostart", 1); - final int maxToStart = this.master.getConfiguration(). - getInt("hbase.master.wait.on.regionservers.maxtostart", Integer.MAX_VALUE); + getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500); + int minToStart = this.master.getConfiguration(). + getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 1); + if (minToStart < 1) { + LOG.warn(String.format( + "The value of '%s' (%d) can not be less than 1, ignoring.", + WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart)); + minToStart = 1; + } + int maxToStart = this.master.getConfiguration(). + getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE); + if (maxToStart < minToStart) { + LOG.warn(String.format( + "The value of '%s' (%d) is set less than '%s' (%d), ignoring.", + WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart, + WAIT_ON_REGIONSERVERS_MINTOSTART, minToStart)); + maxToStart = Integer.MAX_VALUE; + } long now = System.currentTimeMillis(); final long startTime = now; @@ -703,9 +728,8 @@ int oldCount = 0; while ( !this.master.isStopped() && - slept < timeout && count < maxToStart && - (lastCountChange+interval > now || count < minToStart) + (lastCountChange+interval > now || timeout > slept || count < minToStart) ){ // Log some info at every interval time or if there is a change Index: hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (revision 1399860) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (working copy) @@ -69,6 +69,7 @@ import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HStore; @@ -79,6 +80,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; import org.apache.hadoop.hbase.util.RegionSplitter; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.EmptyWatcher; @@ -730,9 +732,13 @@ createRootDir(); // These settings will make the server waits until this exact number of - // regions servers are connected. - conf.setInt("hbase.master.wait.on.regionservers.mintostart", numSlaves); - conf.setInt("hbase.master.wait.on.regionservers.maxtostart", numSlaves); + // regions servers are connected. + if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1) == -1) { + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, numSlaves); + } + if (conf.getInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1) == -1) { + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, numSlaves); + } Configuration c = new Configuration(this.conf); this.hbaseCluster = @@ -816,6 +822,9 @@ zooKeeperWatcher = null; } + // unset the configuration for MIN and MAX RS to start + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, -1); if (this.hbaseCluster != null) { this.hbaseCluster.shutdown(); // Wait till hbase is down before going on to shutdown zk. @@ -1542,10 +1551,29 @@ public void expireRegionServerSession(int index) throws Exception { HRegionServer rs = getMiniHBaseCluster().getRegionServer(index); expireSession(rs.getZooKeeper(), false); + decrementMinRegionServerCount(); } + private void decrementMinRegionServerCount() { + // decrement the count for this.conf, for newly spwaned master + // this.hbaseCluster shares this configuration too + decrementMinRegionServerCount(getConfiguration()); + // each master thread keeps a copy of configuration + for (MasterThread master : getHBaseCluster().getMasterThreads()) { + decrementMinRegionServerCount(master.getMaster().getConfiguration()); + } + } + private void decrementMinRegionServerCount(Configuration conf) { + int currentCount = conf.getInt( + ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, -1); + if (currentCount != -1) { + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, + Math.max(currentCount - 1, 1)); + } + } + public void expireSession(ZooKeeperWatcher nodeZK) throws Exception { expireSession(nodeZK, false); } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 1399860) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy) @@ -157,8 +157,8 @@ // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); - conf.setInt("hbase.master.wait.on.regionservers.mintostart", 3); - conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 3); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 3); // Start the cluster HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); @@ -460,8 +460,8 @@ // Need to drop the timeout much lower conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000); conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000); - conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1); - conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 2); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2); TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); log("Cluster started"); Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java (revision 1399860) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java (working copy) @@ -247,8 +247,8 @@ public void testCatalogDeploys() throws IOException, KeeperException, InterruptedException, DeserializationException, ServiceException { final Configuration conf = TESTUTIL.getConfiguration(); - conf.setInt("hbase.master.wait.on.regionservers.mintostart", 1); - conf.setInt("hbase.master.wait.on.regionservers.maxtostart", 1); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 1); final long now = System.currentTimeMillis(); // Name for our single mocked up regionserver. Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 1399860) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (working copy) @@ -69,8 +69,11 @@ @BeforeClass public static void setUpBeforeClass() throws Exception { // Set it so that this test runs with my custom master - TESTUTIL.getConfiguration().setClass(HConstants.MASTER_IMPL, - TestingMaster.class, HMaster.class); + Configuration conf = TESTUTIL.getConfiguration(); + conf.setClass(HConstants.MASTER_IMPL, TestingMaster.class, HMaster.class); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 3); + conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 4); + // Start up the cluster. TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS); } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java (revision 1399860) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/TestZooKeeper.java (working copy) @@ -444,7 +444,7 @@ * Tests whether the logs are split when master recovers from a expired zookeeper session and an * RS goes down. */ - @Test(timeout = 180000) + @Test(timeout = 240000) public void testLogSplittingAfterMasterRecoveryDueToZKExpiry() throws IOException, KeeperException, InterruptedException { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();