Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1292711)
+++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy)
@@ -26,20 +26,20 @@
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -47,6 +47,7 @@
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerLoad;
import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MasterInSafeModeException;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
@@ -70,7 +71,6 @@
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.ProtocolSignature;
import org.apache.hadoop.hbase.ipc.RpcServer;
-import org.apache.hadoop.hbase.master.CatalogJanitor.SplitParentFirstComparator;
import org.apache.hadoop.hbase.master.handler.CreateTableHandler;
import org.apache.hadoop.hbase.master.handler.DeleteTableHandler;
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
@@ -407,8 +407,9 @@
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
zooKeeper.registerListenerFirst(assignmentManager);
- this.regionServerTracker = new RegionServerTracker(zooKeeper, this,
- this.serverManager);
+ this.regionServerTracker =
+ createRegionServerTracker(this.zooKeeper, this, this.serverManager);
+
this.regionServerTracker.start();
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this,
@@ -434,6 +435,19 @@
", cluster-up flag was=" + wasUp);
}
+ /**
+ * Override to change master's RegionServerTracker creation. Used testing
+ *
+ * @param zkw
+ * @param a
+ * @param sm
+ * @return Instance of RegionServerTracker
+ */
+ public RegionServerTracker createRegionServerTracker(final ZooKeeperWatcher zkw,
+ final Abortable a, final ServerManager sm) {
+ return new RegionServerTracker(zkw, a, sm);
+ }
+
// Check if we should stop every second.
private Sleeper stopSleeper = new Sleeper(1000, this);
private void loop() {
@@ -514,8 +528,7 @@
// TODO: Should do this in background rather than block master startup
status.setStatus("Splitting logs after master startup");
- this.fileSystemManager.
- splitLogAfterStartup(this.serverManager.getOnlineServers().keySet());
+ splitLogAfterStartup(this.fileSystemManager, this.serverManager);
// Make sure root and meta assigned before proceeding.
assignRootAndMeta(status);
@@ -559,9 +572,22 @@
LOG.error("Coprocessor postStartMaster() hook failed", ioe);
}
}
+
+ this.serverManager.expireDelayedServers();
}
/**
+ * Override to change master's splitLogAfterStartup. Used testing
+ *
+ * @param mfs
+ * @param sm
+ */
+ public void splitLogAfterStartup(final MasterFileSystem mfs,
+ final ServerManager sm) {
+ mfs. splitLogAfterStartup(sm.getOnlineServers().keySet());
+ }
+
+ /**
* Check -ROOT- and .META. are assigned. If not,
* assign them.
* @throws InterruptedException
@@ -578,17 +604,10 @@
status.setStatus("Assigning ROOT region");
boolean rit = this.assignmentManager.
processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO);
- ServerName expiredServer = null;
if (!catalogTracker.verifyRootRegionLocation(timeout)) {
ServerName currentRootServer = this.catalogTracker.getRootLocation();
- if (expireIfOnline(currentRootServer)) {
- // We are expiring this server. The processing of expiration will assign
- // root so don't do it here.
- expiredServer = currentRootServer;
- } else {
- // Root was not on an online server when we failed verification
- this.assignmentManager.assignRoot();
- }
+ splitLogIfOnline(currentRootServer);
+ this.assignmentManager.assignRoot();
this.catalogTracker.waitForRoot();
//This guarantees that the transition has completed
this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO);
@@ -608,13 +627,8 @@
if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) {
ServerName currentMetaServer =
this.catalogTracker.getMetaLocationOrReadLocationFromRoot();
- if (currentMetaServer != null && currentMetaServer.equals(expiredServer)) {
- // We are expiring the server that is carrying meta already.
- // The expiration processing will take care of reassigning meta.
- expireIfOnline(currentMetaServer);
- } else {
- this.assignmentManager.assignMeta();
- }
+ splitLogIfOnline(currentMetaServer);
+ assignmentManager.assignMeta();
this.catalogTracker.waitForMeta();
// Above check waits for general meta availability but this does not
// guarantee that the transition has completed
@@ -665,16 +679,17 @@
}
/**
- * Expire a server if we find it is one of the online servers set.
+ * Split a server's log if we find it is one of the online servers set.
+ *
* @param sn ServerName to check.
- * @return True if server was online and so we expired it as unreachable.
+ * @throws IOException
*/
- private boolean expireIfOnline(final ServerName sn) {
- if (sn == null) return false;
- if (!this.serverManager.isServerOnline(sn)) return false;
- LOG.info("Forcing expiration of " + sn);
- this.serverManager.expireServer(sn);
- return true;
+ private void splitLogIfOnline(final ServerName sn) throws IOException {
+ if (sn == null || !this.serverManager.isServerOnline(sn)) {
+ return;
+ }
+ LOG.info("Forcing split log of " + sn);
+ this.fileSystemManager.splitLog(sn);
}
@Override
@@ -1168,6 +1183,9 @@
@Override
public void deleteTable(final byte [] tableName) throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
cpHost.preDeleteTable(tableName);
}
@@ -1230,6 +1248,9 @@
public void addColumn(byte [] tableName, HColumnDescriptor column)
throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
if (cpHost.preAddColumn(tableName, column)) {
return;
@@ -1244,6 +1265,9 @@
public void modifyColumn(byte [] tableName, HColumnDescriptor descriptor)
throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
if (cpHost.preModifyColumn(tableName, descriptor)) {
return;
@@ -1258,6 +1282,9 @@
public void deleteColumn(final byte [] tableName, final byte [] c)
throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
if (cpHost.preDeleteColumn(tableName, c)) {
return;
@@ -1271,6 +1298,9 @@
}
public void enableTable(final byte [] tableName) throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
cpHost.preEnableTable(tableName);
}
@@ -1283,6 +1313,9 @@
}
public void disableTable(final byte [] tableName) throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
cpHost.preDisableTable(tableName);
}
@@ -1332,6 +1365,9 @@
@Override
public void modifyTable(final byte[] tableName, HTableDescriptor htd)
throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
if (cpHost != null) {
cpHost.preModifyTable(tableName, htd);
}
@@ -1632,6 +1668,9 @@
return this.abort;
}
+ public boolean isSafeMode() {
+ return !this.initialized;
+ }
/**
* Report whether this master is currently the active master or not.
@@ -1667,6 +1706,9 @@
@Override
public void assign(final byte [] regionName)throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
Pair pair =
MetaReader.getRegion(this.catalogTracker, regionName);
if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
@@ -1690,6 +1732,9 @@
@Override
public void unassign(final byte [] regionName, final boolean force)
throws IOException {
+ if (isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
Pair pair =
MetaReader.getRegion(this.catalogTracker, regionName);
if (pair == null) throw new UnknownRegionException(Bytes.toString(regionName));
Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1292711)
+++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy)
@@ -757,7 +757,7 @@
// Interrupt catalog tracker here in case any regions being opened out in
// handlers are stuck waiting on meta or root.
if (this.catalogTracker != null) this.catalogTracker.stop();
- if (this.fsOk) {
+ if (!this.killed && this.fsOk) {
waitOnAllRegionsToClose(abortRequested);
LOG.info("stopping server " + this.serverNameFromMasterPOV +
"; all regions closed.");
Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1292711)
+++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy)
@@ -64,7 +64,6 @@
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
-import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState.State;
import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
@@ -1767,6 +1766,8 @@
final String encodedName = state.getRegion().getEncodedName();
final List servers = this.serverManager.getOnlineServersList();
final List drainingServers = this.serverManager.getDrainingServersList();
+ final Set deadNotExpiredServers = this.serverManager
+ .getDeadNotExpiredServers();
if (serverToExclude != null) servers.remove(serverToExclude);
@@ -1780,6 +1781,16 @@
}
}
+ // Loop through the deadNotExpired servers and remove them from the server
+ // list.
+ if (!deadNotExpiredServers.isEmpty()) {
+ for (final ServerName server : deadNotExpiredServers) {
+ LOG.debug("Removing dead but not expired server: " + server
+ + " from eligible server pool.");
+ servers.remove(server);
+ }
+ }
+
if (servers.isEmpty()) return null;
RegionPlan randomPlan = new RegionPlan(state.getRegion(), null,
@@ -2170,7 +2181,19 @@
public void assignAllUserRegions() throws IOException, InterruptedException {
// Get all available servers
List servers = serverManager.getOnlineServersList();
+ Set deadNotExpiredServers = serverManager
+ .getDeadNotExpiredServers();
+ // Loop through the deadNotExpired servers and remove them from the server
+ // list.
+ if (!deadNotExpiredServers.isEmpty()) {
+ for (final ServerName server : deadNotExpiredServers) {
+ LOG.debug("Removing dead but not expired server: " + server
+ + " from eligible server pool.");
+ servers.remove(server);
+ }
+ }
+
// If there are no servers we need not proceed with region assignment.
if(servers.isEmpty()) return;
Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 0)
+++ src/test/java/org/apache/hadoop/hbase/regionserver/TestRSKilledWhenMasterInitializing.java (revision 0)
@@ -0,0 +1,257 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MasterFileSystem;
+import org.apache.hadoop.hbase.master.ServerManager;
+import org.apache.hadoop.hbase.master.TestMasterFailover;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.zookeeper.KeeperException;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestRSKilledWhenMasterInitializing {
+ private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
+
+ private static final HBaseTestingUtility TESTUTIL = new HBaseTestingUtility();
+ private static final int NUM_MASTERS = 1;
+ private static final int NUM_RS = 4;
+
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ // Set it so that this test runs with my custom master
+ TESTUTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
+ TestingMaster.class, HMaster.class);
+ // Start up the cluster.
+ TESTUTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+ }
+
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ if (!TESTUTIL.getHBaseCluster().getMaster().isInitialized()) {
+ // master is not initialized and is waiting something forever.
+ for (MasterThread mt : TESTUTIL.getHBaseCluster().getLiveMasterThreads()) {
+ mt.interrupt();
+ }
+ }
+ TESTUTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * An HMaster instance used in this test. If 'TestingMaster.sleep' is set in
+ * the Configuration, then we'll sleep after log is split and we'll also
+ * return a custom RegionServerTracker.
+ */
+ public static class TestingMaster extends HMaster {
+ private boolean logSplit = false;
+
+ public TestingMaster(Configuration conf) throws IOException,
+ KeeperException, InterruptedException {
+ super(conf);
+ }
+
+ @Override
+ public void splitLogAfterStartup(MasterFileSystem mfs, ServerManager sm) {
+ super.splitLogAfterStartup(mfs, sm);
+ logSplit = true;
+ // If "TestingMaster.sleep" is set, sleep after log split.
+ if (getConfiguration().getBoolean("TestingMaster.sleep", false)) {
+ int duration = getConfiguration().getInt(
+ "TestingMaster.sleep.duration", 0);
+ Threads.sleep(duration);
+ }
+ }
+
+
+ public boolean isLogSplitAfterStartup() {
+ return logSplit;
+ }
+ }
+
+ @Test(timeout = 120000)
+ public void testCorrectnessWhenMasterFailOver() throws Exception {
+ final byte[] TABLENAME = Bytes.toBytes("testCorrectnessWhenMasterFailOver");
+ final byte[] FAMILY = Bytes.toBytes("family");
+ final byte[][] SPLITKEYS = { Bytes.toBytes("b"), Bytes.toBytes("i") };
+
+ MiniHBaseCluster cluster = TESTUTIL.getHBaseCluster();
+ while (cluster.getMaster().isSafeMode()) {
+ Thread.sleep(100);
+ }
+
+ HTableDescriptor desc = new HTableDescriptor(TABLENAME);
+ desc.addFamily(new HColumnDescriptor(FAMILY));
+ HBaseAdmin hbaseAdmin = TESTUTIL.getHBaseAdmin();
+ hbaseAdmin.createTable(desc, SPLITKEYS);
+
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+
+ HTable table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
+ List puts = new ArrayList();
+ Put put1 = new Put(Bytes.toBytes("a"));
+ put1.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ Put put2 = new Put(Bytes.toBytes("h"));
+ put2.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ Put put3 = new Put(Bytes.toBytes("o"));
+ put3.add(FAMILY, Bytes.toBytes("q1"), Bytes.toBytes("value"));
+ puts.add(put1);
+ puts.add(put2);
+ puts.add(put3);
+ table.put(puts);
+ ResultScanner resultScanner = table.getScanner(new Scan());
+ int count = 0;
+ while (resultScanner.next() != null) {
+ count++;
+ }
+ resultScanner.close();
+ table.close();
+ assertEquals(3, count);
+
+ /* Starting test */
+ cluster.getConfiguration().setBoolean("TestingMaster.sleep", true);
+ cluster.getConfiguration().setInt("TestingMaster.sleep.duration", 10000);
+
+ /* NO.1 .META. region correctness */
+ // First abort master
+ abortMaster(cluster);
+ TestingMaster master = startMasterAndWaitUntilLogSplit(cluster);
+
+ // Second kill meta server
+ int metaServerNum = cluster.getServerWithMeta();
+ int rootServerNum = cluster.getServerWith(HRegionInfo.ROOT_REGIONINFO
+ .getRegionName());
+ HRegionServer metaRS = cluster.getRegionServer(metaServerNum);
+ LOG.debug("Killing metaRS and carryingRoot = "
+ + (metaServerNum == rootServerNum));
+ metaRS.kill();
+ metaRS.join();
+
+ /*
+ * Sleep double time of TestingMaster.sleep.duration, so we can ensure that
+ * master has already assigned ROOTandMETA or is blocking on assigning
+ * ROOTandMETA
+ */
+ Thread.sleep(10000 * 2);
+
+ waitUntilMasterIsInitialized(master);
+
+ // Third check whether data is correct in meta region
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+
+ /*
+ * NO.2 -ROOT- region correctness . If the .META. server killed in the NO.1
+ * is also carrying -ROOT- region, it is not needed
+ */
+ if (rootServerNum != metaServerNum) {
+ // First abort master
+ abortMaster(cluster);
+ master = startMasterAndWaitUntilLogSplit(cluster);
+
+ // Second kill meta server
+ HRegionServer rootRS = cluster.getRegionServer(rootServerNum);
+ LOG.debug("Killing rootRS");
+ rootRS.kill();
+ rootRS.join();
+
+ /*
+ * Sleep double time of TestingMaster.sleep.duration, so we can ensure
+ * that master has already assigned ROOTandMETA or is blocking on
+ * assigning ROOTandMETA
+ */
+ Thread.sleep(10000 * 2);
+ waitUntilMasterIsInitialized(master);
+
+ // Third check whether data is correct in meta region
+ assertTrue(hbaseAdmin.isTableAvailable(TABLENAME));
+ }
+
+ /* NO.3 data region correctness */
+ ServerManager serverManager = cluster.getMaster().getServerManager();
+ while (serverManager.areDeadServersInProgress()) {
+ Thread.sleep(100);
+ }
+ table = new HTable(TESTUTIL.getConfiguration(), TABLENAME);
+ resultScanner = table.getScanner(new Scan());
+ count = 0;
+ while (resultScanner.next() != null) {
+ count++;
+ }
+ resultScanner.close();
+ table.close();
+ assertEquals(3, count);
+ }
+
+ private void abortMaster(MiniHBaseCluster cluster)
+ throws InterruptedException {
+ for (MasterThread mt : cluster.getLiveMasterThreads()) {
+ if (mt.getMaster().isActiveMaster()) {
+ mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
+ mt.join();
+ break;
+ }
+ }
+ LOG.debug("Master is aborted");
+ }
+
+ private TestingMaster startMasterAndWaitUntilLogSplit(MiniHBaseCluster cluster)
+ throws IOException, InterruptedException {
+ TestingMaster master = (TestingMaster) cluster.startMaster().getMaster();
+ while (!master.isLogSplitAfterStartup()) {
+ Thread.sleep(100);
+ }
+ LOG.debug("splitted:" + master.isLogSplitAfterStartup() + ",initialized:"
+ + master.isInitialized());
+ return master;
+ }
+
+ private void waitUntilMasterIsInitialized(HMaster master)
+ throws InterruptedException {
+ while (!master.isInitialized()) {
+ Thread.sleep(100);
+ }
+ LOG.debug("master isInitialized");
+ }
+
+}
Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1292711)
+++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy)
@@ -24,6 +24,8 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -36,6 +38,7 @@
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerLoad;
+import org.apache.hadoop.hbase.MasterInSafeModeException;
import org.apache.hadoop.hbase.PleaseHoldException;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
@@ -45,10 +48,11 @@
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
-import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
+import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
+import org.apache.mina.util.ConcurrentHashSet;
/**
* The ServerManager class manages info about region servers.
@@ -96,6 +100,13 @@
private final long maxSkew;
/**
+ * Set of region servers who are dead, but not are expired now.
+ * If one server die during master's safe mode, the server will be added to
+ * set.
+ */
+ private Set deadNotExpiredServers = new ConcurrentHashSet();
+
+ /**
* Constructor.
* @param master
* @param services
@@ -347,6 +358,11 @@
* shutdown processing.
*/
public synchronized void expireServer(final ServerName serverName) {
+ if (services.isSafeMode()) {
+ LOG.info("Master is in safe mode, delay expiring server " + serverName);
+ this.deadNotExpiredServers.add(serverName);
+ return;
+ }
excludeRegionServerFromSchemaChanges(serverName);
if (!this.onlineServers.containsKey(serverName)) {
LOG.warn("Received expiration of " + serverName +
@@ -393,6 +409,22 @@
carryingRoot + ", meta=" + carryingMeta);
}
+ /**
+ * Expire the servers who die during master's safe mode. It will be called at
+ * the end of HMaster#finishInitialization.
+ *
+ * */
+ void expireDelayedServers() throws MasterInSafeModeException {
+ if (services.isSafeMode()) {
+ throw new MasterInSafeModeException();
+ }
+ Iterator serverIterator=deadNotExpiredServers.iterator();
+ while (serverIterator.hasNext()) {
+ expireServer(serverIterator.next());
+ serverIterator.remove();
+ }
+ }
+
/*
* Remove the server from the drain list.
*/
@@ -604,6 +636,13 @@
return new ArrayList(this.drainingServers);
}
+ /**
+ * @return A copy of the internal set of deadNotExpired servers.
+ */
+ public Set getDeadNotExpiredServers() {
+ return new HashSet(this.deadNotExpiredServers);
+ }
+
public boolean isServerOnline(ServerName serverName) {
return onlineServers.containsKey(serverName);
}
Index: src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (revision 1292711)
+++ src/main/java/org/apache/hadoop/hbase/master/MasterServices.java (working copy)
@@ -24,8 +24,6 @@
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableDescriptors;
-import org.apache.hadoop.hbase.TableNotDisabledException;
-import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.zookeeper.MasterSchemaChangeTracker;
@@ -92,4 +90,9 @@
*/
public RegionServerTracker getRegionServerTracker();
+ /**
+ * @return true if master is in safe mode.
+ */
+ public boolean isSafeMode();
+
}
Index: src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (revision 1292711)
+++ src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java (working copy)
@@ -35,7 +35,18 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.*;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.SmallTests;
+import org.apache.hadoop.hbase.TableDescriptors;
+import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
@@ -260,6 +271,11 @@
public RegionServerTracker getRegionServerTracker() {
return null;
}
+
+ @Override
+ public boolean isSafeMode() {
+ return false;
+ }
}
@Test
Index: src/main/java/org/apache/hadoop/hbase/MasterInSafeModeException.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/MasterInSafeModeException.java (revision 0)
+++ src/main/java/org/apache/hadoop/hbase/MasterInSafeModeException.java (revision 0)
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+
+/**
+ * Thrown if the master is not initialized when call admin operations.
+ */
+public class MasterInSafeModeException extends IOException {
+ private static final long serialVersionUID = -5828790543381661660L;
+
+ /** default constructor */
+ public MasterInSafeModeException() {
+ super();
+ }
+
+ /**
+ * Constructor
+ *
+ * @param s message
+ */
+ public MasterInSafeModeException(String s) {
+ super(s);
+ }
+
+ /**
+ * Constructor taking another exception.
+ *
+ * @param e Exception to grab data from.
+ */
+ public MasterInSafeModeException(Exception e) {
+ super(e);
+ }
+
+}