>> deadServers =
- rebuildUserRegions();
+ rebuildUserRegions(onlineServers);
// Process list of dead servers; note this will add regions to the RIT.
// processRegionsInTransition will read them and assign them out.
processDeadServers(deadServers);
@@ -1561,12 +1563,15 @@
*
* Returns a map of servers that are not found to be online and the regions
* they were hosting.
+ * @param onlineServers if one region's location belongs to onlineServers, it
+ * doesn't need to be assigned
* @return map of servers not online to their assigned regions, as stored
* in META
* @throws IOException
* @throws KeeperException
*/
- private Map>> rebuildUserRegions()
+ private Map>> rebuildUserRegions(
+ final Set onlineServers)
throws IOException, KeeperException {
// Region assignment from META
List results = MetaReader.fullScanOfResults(catalogTracker);
@@ -1593,7 +1598,7 @@
if (checkIfRegionBelongsToDisabling(regionInfo)) {
disablingTables.add(disablingTableName);
}
- } else if (!serverManager.isServerOnline(regionLocation.getServerName())) {
+ } else if (!onlineServers.contains(regionLocation.getServerName())) {
// Region is located on a server that isn't online
List> offlineRegions =
offlineServers.get(regionLocation.getServerName());
Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision 1231476)
+++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (working copy)
@@ -22,15 +22,14 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
import java.util.Set;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.hbase.HServerInfo;
/**
- * Class to hold dead servers list and utility querying dead server list.
+ * Class to hold dead servers list, utility querying dead server list and the
+ * dead servers being processed by the ServerShutdownHandler.
*/
public class DeadServer implements Set {
/**
@@ -41,7 +40,11 @@
* because by then, its regions have probably been reassigned.
*/
private final Set deadServers = new HashSet();
-
+ /**
+ * Set of dead servers under processing by the ServerShutdownHander.
+ */
+ private final Set deadServersUnderProcessing = new HashSet();
+
/** Maximum number of dead servers to keep track of */
private final int maxDeadServers;
@@ -111,13 +114,22 @@
return clone;
}
+ synchronized Set getDeadServersBeingProcessed() {
+ Set clone = new HashSet(
+ this.deadServersUnderProcessing.size());
+ clone.addAll(this.deadServersUnderProcessing);
+ return clone;
+ }
+
public synchronized boolean add(String e) {
this.numProcessing++;
+ deadServersUnderProcessing.add(e);
return deadServers.add(e);
}
public synchronized void finish(String e) {
this.numProcessing--;
+ deadServersUnderProcessing.remove(e);
}
public synchronized int size() {
Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1231476)
+++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy)
@@ -25,8 +25,10 @@
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.logging.Log;
@@ -51,7 +53,6 @@
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
-import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
@@ -374,11 +375,17 @@
// Wait for region servers to report in. Returns count of regions.
int regionCount = this.serverManager.waitForRegionServers();
+
+ Set knownServers = new HashSet();
+ knownServers.addAll(serverManager.getOnlineServers().keySet());
// TODO: Should do this in background rather than block master startup
- this.fileSystemManager.
- splitLogAfterStartup(this.serverManager.getOnlineServers());
-
+ this.fileSystemManager.splitLogAfterStartup(knownServers);
+
+ if (this.serverManager.areDeadServersInProgress()) {
+ // Dead servers are processing
+ knownServers.addAll(serverManager.getDeadServersBeingProcessed());
+ }
// Make sure root and meta assigned before proceeding.
assignRootAndMeta();
@@ -393,7 +400,7 @@
this.assignmentManager.assignAllUserRegions();
} else {
LOG.info("Master startup proceeding: master failover");
- this.assignmentManager.processFailover();
+ this.assignmentManager.processFailover(knownServers);
}
// Start balancer and meta catalog janitor after meta and regions have
Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1231476)
+++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy)
@@ -20,7 +20,7 @@
package org.apache.hadoop.hbase.master;
import java.io.IOException;
-import java.util.Map;
+import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
@@ -155,7 +155,7 @@
* @param onlineServers Map of online servers keyed by
* {@link HServerInfo#getServerName()}
*/
- void splitLogAfterStartup(final Map onlineServers) {
+ void splitLogAfterStartup(final Set onlineServers) {
Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
try {
if (!this.fs.exists(logsDirPath)) {
@@ -176,7 +176,7 @@
}
for (FileStatus status : logFolders) {
String serverName = status.getPath().getName();
- if (onlineServers.get(serverName) == null) {
+ if (!onlineServers.contains(serverName)) {
LOG.info("Log folder " + status.getPath() + " doesn't belong " +
"to a known region server, splitting");
splitLog(serverName);
Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1231476)
+++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy)
@@ -49,7 +49,6 @@
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
-import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
/**
* The ServerManager class manages info about region servers - HServerInfo,
@@ -420,11 +419,22 @@
}
}
+ /**
+ * @return Set of known dead servers.
+ */
public Set getDeadServers() {
return this.deadservers.clone();
}
/**
+ * @return Set of dead servers which are being processed by the
+ * ServerShutdownHander.
+ */
+ public Set getDeadServersBeingProcessed() {
+ return this.deadservers.getDeadServersBeingProcessed();
+ }
+
+ /**
* Checks if any dead servers are currently in progress.
* @return true if any RS are being processed as dead, false if not
*/