Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy)
@@ -26,22 +26,22 @@
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
import javax.management.ObjectName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -50,6 +50,7 @@
import org.apache.hadoop.hbase.HServerLoad;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableDescriptors;
@@ -415,8 +416,9 @@
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
zooKeeper.registerListenerFirst(assignmentManager);
- this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
- this.serverManager);
+ this.regionServerTracker =
+ createRegionServerTracker(this.zooKeeper, this, this.serverManager);
+
this.regionServerTracker.start();
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
@@ -442,6 +444,19 @@
", cluster-up flag was=" + wasUp);
}
+ /**
+ * Override to change master's RegionServerTracker creation. Used testing
+ *
+ * @param zkw
+ * @param a
+ * @param sm
+ * @return Instance of RegionServerTracker
+ */
+ public RegionServerTracker createRegionServerTracker(final ZooKeeperWatcher zkw,
+ final Abortable a, final ServerManager sm) {
+ return new RegionServerTracker(zkw, a, sm);
+ }
+
// Check if we should stop every second.
private Sleeper stopSleeper = new Sleeper(1000, this);
private void loop() {
@@ -522,8 +537,7 @@
// TODO: Should do this in background rather than block master startup
status.setStatus("Splitting logs after master startup");
- this.fileSystemManager.
- splitLogAfterStartup(this.serverManager.getOnlineServers().keySet());
+ splitLogAfterStartup(this.fileSystemManager, this.serverManager);
// Make sure root and meta assigned before proceeding.
assignRootAndMeta(status);
@@ -569,9 +583,22 @@
LOG.error("Coprocessor postStartMaster() hook failed", ioe);
}
}
+
+ this.serverManager.expireDelayedServers();
}
/**
+ * Override to change master's splitLogAfterStartup. Used testing
+ *
+ * @param mfs
+ * @param sm
+ */
+ public void splitLogAfterStartup(final MasterFileSystem mfs,
+ final ServerManager sm) {
+ mfs. splitLogAfterStartup(sm.getOnlineServers().keySet());
+ }
+
+ /**
* Check -ROOT- and .META. are assigned. If not,
* assign them.
* @throws InterruptedException
@@ -588,17 +615,11 @@
status.setStatus("Assigning ROOT region");
boolean rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
- ServerName expiredServer = null;
+ ServerName currentRootServer = null;
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
- ServerName currentRootServer = this.catalogTracker.getRootLocation();
- if (expireIfOnline(currentRootServer)) {
- // We are expiring this server. The processing of expiration will assign
- // root so don't do it here.
- expiredServer = currentRootServer;
- } else {
- // Root was not on an online server when we failed verification
- this.assignmentManager.assignRoot();
- }
+ currentRootServer = this.catalogTracker.getRootLocation();
+ splitLogIfOnline(currentRootServer);
+ this.assignmentManager.assignRoot();
this.catalogTracker.waitForRoot();
//This guarantees that the transition has completed
this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
@@ -618,13 +639,11 @@
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
ServerName currentMetaServer =
this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
- if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) {
- // We are expiring the server that is carrying meta already.
- // The expiration processing will take care of reassigning meta.
- expireIfOnline(currentMetaServer);
- } else {
- this.assignmentManager.assignMeta();
+ if (currentMetaServer != null
+ && !currentMetaServer.equals(currentRootServer)) {
+ splitLogIfOnline(currentMetaServer);
}
+ assignmentManager.assignMeta();
this.catalogTracker.waitForMeta();
// Above check waits for general meta availability but this does not
// guarantee that the transition has completed
@@ -675,16 +694,17 @@
}
/**
- * Expire a server if we find it is one of the online servers set.
+ * Split a server's log if we find it is one of the online servers set.
+ *
* @param sn ServerName to check.
- * @return True if server was online and so we expired it as unreachable.
+ * @throws IOException
*/
- private boolean expireIfOnline(final ServerName sn) {
- if (sn == null) return false;
- if (!this.serverManager.isServerOnline(sn)) return false;
- LOG.info("Forcing expiration of " + sn);
- this.serverManager.expireServer(sn);
- return true;
+ private void splitLogIfOnline(final ServerName sn) throws IOException {
+ if (sn == null || !this.serverManager.isServerOnline(sn)) {
+ return;
+ }
+ LOG.info("Forcing split log of " + sn);
+ this.fileSystemManager.splitLog(sn);
}
@Override
@@ -1178,6 +1198,9 @@
@Override
public void deleteTable(final byte [] tableName) throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
cpHost.preDeleteTable(tableName);
}
@@ -1240,6 +1263,9 @@
public void addColumn(byte [] tableName, HColumnDescriptor column)
throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
if (cpHost.preAddColumn(tableName, column)) {
return;
@@ -1254,6 +1280,9 @@
public void modifyColumn(byte [] tableName, HColumnDescriptor descriptor)
throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
if (cpHost.preModifyColumn(tableName, descriptor)) {
return;
@@ -1268,6 +1297,9 @@
public void deleteColumn(final byte [] tableName, final byte [] c)
throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
if (cpHost.preDeleteColumn(tableName, c)) {
return;
@@ -1281,6 +1313,9 @@
}
public void enableTable(final byte [] tableName) throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
cpHost.preEnableTable(tableName);
}
@@ -1293,6 +1328,9 @@
}
public void disableTable(final byte [] tableName) throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
cpHost.preDisableTable(tableName);
}
@@ -1342,6 +1380,9 @@
@Override
public void modifyTable(final byte[] tableName, HTableDescriptor htd)
throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
if (cpHost != null) {
cpHost.preModifyTable(tableName, htd);
}
@@ -1648,6 +1689,9 @@
return this.abort;
}
+ public boolean isInitializing() {
+ return !this.initialized;
+ }
/**
* Report whether this master is currently the active master or not.
@@ -1683,6 +1727,9 @@
@Override
public void assign(final byte [] regionName)throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
Pair pair =
MetaReader.getRegion(this.catalogTracker, regionName);
if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
@@ -1706,6 +1753,9 @@
@Override
public void unassign(final byte [] regionName, final boolean force)
throws IOException {
+ if (isInitializing()) {
+ throw new PleaseHoldException("Master is initializing");
+ }
Pair pair =
MetaReader.getRegion(this.catalogTracker, regionName);
if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy)
@@ -771,7 +771,7 @@
// Interrupt catalog tracker here in case any regions being opened out in
// handlers are stuck waiting on meta or root.
if (this.catalogTracker != null) this.catalogTracker.stop();
- if (this.fsOk) {
+ if (!this.killed && this.fsOk) {
waitOnAllRegionsToClose(abortRequested);
LOG.info("stopping server " + this.serverNameFromMasterPOV +
"; all regions closed.");
Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy)
@@ -1768,6 +1768,7 @@
final List servers = this.serverManager.getOnlineServersList();
final List drainingServers = this.serverManager.getDrainingServersList();
+
if (serverToExclude != null) servers.remove(serverToExclude);
// Loop through the draining server list and remove them from the server
@@ -1780,6 +1781,11 @@
}
}
+ // Remove the deadNotExpired servers from the server list.
+ removeDeadNotExpiredServers(servers);
+
+
+
if (servers.isEmpty()) return null;
RegionPlan randomPlan = new RegionPlan(state.getRegion(), null,
@@ -1811,7 +1817,7 @@
" so generated a random one; " + randomPlan + "; " +
serverManager.countOfRegionServers() +
" (online=" + serverManager.getOnlineServers().size() +
- ", exclude=" + drainingServers.size() + ") available servers");
+ ", available=" + servers.size() + ") available servers");
return randomPlan;
}
LOG.debug("Using pre-existing plan for region " +
@@ -1820,6 +1826,23 @@
}
/**
+ * Loop through the deadNotExpired server list and remove them from the
+ * servers.
+ * @param servers
+ */
+ public void removeDeadNotExpiredServers(List servers) {
+ Set deadNotExpiredServers = this.serverManager
+ .getDeadNotExpiredServers();
+ if (!deadNotExpiredServers.isEmpty()) {
+ for (ServerName server : deadNotExpiredServers) {
+ LOG.debug("Removing dead but not are expired server: " + server
+ + " from eligible server pool.");
+ servers.remove(server);
+ }
+ }
+ }
+
+ /**
* Unassign the list of regions. Configuration knobs:
* hbase.bulk.waitbetween.reopen indicates the number of milliseconds to
* wait before unassigning another region from this region server
@@ -2132,6 +2155,7 @@
throws IOException,
InterruptedException {
List servers = this.serverManager.getOnlineServersList();
+ removeDeadNotExpiredServers(servers);
assignUserRegions(regions, servers);
}
@@ -2171,6 +2195,9 @@
// Get all available servers
List servers = serverManager.getOnlineServersList();
+ // Remove the deadNotExpired servers from the server list.
+ removeDeadNotExpiredServers(servers);
+
// If there are no servers we need not proceed with region assignment.
if(servers.isEmpty()) return;
Index: src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/PleaseHoldException.java (working copy)
@@ -22,9 +22,10 @@
import java.io.IOException;
/**
- * This exception is thrown by the master when a region server was shut down
- * and restarted so fast that the master still hasn't processed the server
- * shutdown of the first instance.
+ * This exception is thrown by the master when a region server was shut down and
+ * restarted so fast that the master still hasn't processed the server shutdown
+ * of the first instance, or when master is initializing and client call admin's
+ * operations
*/
@SuppressWarnings("serial")
public class PleaseHoldException extends IOException {
Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 0)
+++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 0)
@@ -0,0 +1,257 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterFileSystem;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.TestMasterFailover;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestRSKilledWhenMasterInitializing {
+ private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
+
+ private static final HBaseTestingUtility TESTUTIL = new HBaseTestingUtility();
+ private static final int NUM_MASTERS = 1;
+ private static final int NUM_RS = 4;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // Set it so that this test runs with my custom master
+ TESTUTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
+ TestingMaster.class, HMaster.class);
+ // Start up the cluster.
+ TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ if (!TESTUTIL.getHBaseCluster().getMaster().isInitialized()) {
+ // master is not initialized and is waiting something forever.
+ for (MasterThread mt : TESTUTIL.getHBaseCluster().getLiveMasterThreads()) {
+ mt.interrupt();
+ }
+ }
+ TESTUTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * An HMaster instance used in this test. If 'TestingMaster.sleep' is set in
+ * the Configuration, then we'll sleep after log is split and we'll also
+ * return a custom RegionServerTracker.
+ */
+ public static class TestingMaster extends HMaster {
+ private boolean logSplit = false;
+
+ public TestingMaster(Configuration conf) throws IOException,
+ KeeperException, InterruptedException {
+ super(conf);
+ }
+
+ @Override
+ public void splitLogAfterStartup(MasterFileSystem mfs, ServerManager sm) {
+ super.splitLogAfterStartup(mfs, sm);
+ logSplit = true;
+ // If "TestingMaster.sleep" is set, sleep after log split.
+ if (getConfiguration().getBoolean("TestingMaster.sleep", false)) {
+ int duration = getConfiguration().getInt(
+ "TestingMaster.sleep.duration", 0);
+ Threads.sleep(duration);
+ }
+ }
+
+
+ public boolean isLogSplitAfterStartup() {
+ return logSplit;
+ }
+ }
+
+ @Test(timeout = 120000)
+ public void testCorrectnessWhenMasterFailOver() throws Exception {
+ final byte[] TABLENAME = Bytes.toBytes("testCorrectnessWhenMasterFailOver");
+ final byte[] FAMILY = Bytes.toBytes("family");
+ final byte[][] SPLITKEYS = { Bytes.toBytes("b"), Bytes.toBytes("i") };
+
+ MiniHBaseCluster cluster = TESTUTIL.getHBaseCluster();
+ while (cluster.getMaster().isInitializing()) {
+ Thread.sleep(100);
+ }
+
+ HTableDescriptor desc = new HTableDescriptor(TABLENAME);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ HBaseAdmin hbaseAdmin = TESTUTIL.getHBaseAdmin();
+ hbaseAdmin.createTable(desc, SPLITKEYS);
+
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+
+ HTable table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
+ List puts = new ArrayList();
+ Put put1 = new Put(Bytes.toBytes("a"));
+ put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ Put put2 = new Put(Bytes.toBytes("h"));
+ put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ Put put3 = new Put(Bytes.toBytes("o"));
+ put3.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ puts.add(put1);
+ puts.add(put2);
+ puts.add(put3);
+ table.put(puts);
+ ResultScanner resultScanner = table.getScanner(new Scan());
+ int count = 0;
+ while (resultScanner.next() != null) {
+ count++;
+ }
+ resultScanner.close();
+ table.close();
+ assertEquals(3, count);
+
+ /* Starting test */
+ cluster.getConfiguration().setBoolean("TestingMaster.sleep", true);
+ cluster.getConfiguration().setInt("TestingMaster.sleep.duration", 10000);
+
+ /* NO.1 .META. region correctness */
+ // First abort master
+ abortMaster(cluster);
+ TestingMaster master = startMasterAndWaitUntilLogSplit(cluster);
+
+ // Second kill meta server
+ int metaServerNum = cluster.getServerWithMeta();
+ int rootServerNum = cluster.getServerWith(HRegionInfo.ROOT_REGIONINFO
+ .getRegionName());
+ HRegionServer metaRS = cluster.getRegionServer(metaServerNum);
+ LOG.debug("Killing metaRS and carryingRoot = "
+ + (metaServerNum == rootServerNum));
+ metaRS.kill();
+ metaRS.join();
+
+ /*
+ * Sleep double time of TestingMaster.sleep.duration, so we can ensure that
+ * master has already assigned ROOTandMETA or is blocking on assigning
+ * ROOTandMETA
+ */
+ Thread.sleep(10000 * 2);
+
+ waitUntilMasterIsInitialized(master);
+
+ // Third check whether data is correct in meta region
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+
+ /*
+ * NO.2 -ROOT- region correctness . If the .META. server killed in the NO.1
+ * is also carrying -ROOT- region, it is not needed
+ */
+ if (rootServerNum != metaServerNum) {
+ // First abort master
+ abortMaster(cluster);
+ master = startMasterAndWaitUntilLogSplit(cluster);
+
+ // Second kill meta server
+ HRegionServer rootRS = cluster.getRegionServer(rootServerNum);
+ LOG.debug("Killing rootRS");
+ rootRS.kill();
+ rootRS.join();
+
+ /*
+ * Sleep double time of TestingMaster.sleep.duration, so we can ensure
+ * that master has already assigned ROOTandMETA or is blocking on
+ * assigning ROOTandMETA
+ */
+ Thread.sleep(10000 * 2);
+ waitUntilMasterIsInitialized(master);
+
+ // Third check whether data is correct in meta region
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+ }
+
+ /* NO.3 data region correctness */
+ ServerManager serverManager = cluster.getMaster().getServerManager();
+ while (serverManager.areDeadServersInProgress()) {
+ Thread.sleep(100);
+ }
+ table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
+ resultScanner = table.getScanner(new Scan());
+ count = 0;
+ while (resultScanner.next() != null) {
+ count++;
+ }
+ resultScanner.close();
+ table.close();
+ assertEquals(3, count);
+ }
+
+ private void abortMaster(MiniHBaseCluster cluster)
+ throws InterruptedException {
+ for (MasterThread mt : cluster.getLiveMasterThreads()) {
+ if (mt.getMaster().isActiveMaster()) {
+ mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
+ mt.join();
+ break;
+ }
+ }
+ LOG.debug("Master is aborted");
+ }
+
+ private TestingMaster startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
+ throws IOException, InterruptedException {
+ TestingMaster master = (TestingMaster) cluster.startMaster().getMaster();
+ while (!master.isLogSplitAfterStartup()) {
+ Thread.sleep(100);
+ }
+ LOG.debug("splitted:" + master.isLogSplitAfterStartup() + ",initialized:"
+ + master.isInitialized());
+ return master;
+ }
+
+ private void waitUntilMasterIsInitialized(HMaster master)
+ throws InterruptedException {
+ while (!master.isInitialized()) {
+ Thread.sleep(100);
+ }
+ LOG.debug("master isInitialized");
+ }
+
+}
Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy)
@@ -24,6 +24,8 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -45,10 +47,10 @@
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
-import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
+import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
/**
* The ServerManager class manages info about region servers.
@@ -96,6 +98,13 @@
private final long maxSkew;
/**
+ * Set of region servers who are dead, but not are expired now.
+ * If one server die during master's safe mode, the server will be added to
+ * set.
+ */
+ private Set deadNotExpiredServers = new HashSet();
+
+ /**
* Constructor.
* @param master
* @param services
@@ -347,6 +356,11 @@
* shutdown processing.
*/
public synchronized void expireServer(final ServerName serverName) {
+ if (services.isInitializing()) {
+ LOG.info("Master is initializing, delay expiring server " + serverName);
+ this.deadNotExpiredServers.add(serverName);
+ return;
+ }
excludeRegionServerFromSchemaChanges(serverName);
if (!this.onlineServers.containsKey(serverName)) {
LOG.warn("Received expiration of " + serverName +
@@ -393,6 +407,24 @@
carryingRoot + ", meta=" + carryingMeta);
}
+ /**
+ * Expire the servers who die during master's safe mode. It will be called at
+ * the end of HMaster#finishInitialization.
+ *
+ * @throws IOException
+ *
+ * */
+ synchronized void expireDelayedServers() throws IOException {
+ if (services.isInitializing()) {
+ throw new IOException("Master is initializing.");
+ }
+ Iterator serverIterator=deadNotExpiredServers.iterator();
+ while (serverIterator.hasNext()) {
+ expireServer(serverIterator.next());
+ serverIterator.remove();
+ }
+ }
+
/*
* Remove the server from the drain list.
*/
@@ -604,6 +636,13 @@
return new ArrayList(this.drainingServers);
}
+ /**
+ * @return A copy of the internal set of deadNotExpired servers.
+ */
+ public Set getDeadNotExpiredServers() {
+ return new HashSet(this.deadNotExpiredServers);
+ }
+
public boolean isServerOnline(ServerName serverName) {
return onlineServers.containsKey(serverName);
}
Index: src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy)
@@ -170,6 +170,8 @@
// 4. Trigger immediate assignment of the regions in round-robin fashion
List servers = serverManager.getOnlineServersList();
+ // Remove the deadNotExpired servers from the server list.
+ assignmentManager.removeDeadNotExpiredServers(servers);
try {
this.assignmentManager.assignUserRegions(Arrays.asList(newRegions),
servers);
Index: src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (revision 1294453)
+++ src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (working copy)
@@ -24,8 +24,6 @@
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableDescriptors;
-import org.apache.hadoop.hbase.TableNotDisabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker;
@@ -92,4 +90,9 @@
*/
public RegionServerTracker getRegionServerTracker();
+ /**
+ * @return true if master is initializing.
+ */
+ public boolean isInitializing();
+
}
Index: src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (revision 1294453)
+++ src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (working copy)
@@ -35,7 +35,18 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.SmallTests;
+import org.apache.hadoop.hbase.TableDescriptors;
+import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
@@ -260,6 +271,11 @@
public RegionServerTracker getRegionServerTracker() {
return null;
}
+
+ @Override
+ public boolean isInitializing() {
+ return false;
+ }
}
@Test