Index: src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java (revision 1040242)
+++ src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java (working copy)
@@ -305,14 +305,14 @@
String serverName) throws InterruptedException {
ServerManager sm = activeMaster.getMaster().getServerManager();
// First wait for it to be in dead list
- while (!sm.deadservers.isDeadServer(serverName)) {
+ while (!sm.getDeadServers().contains(serverName)) {
log("Waiting for [" + serverName + "] to be listed as dead in master");
Thread.sleep(1);
}
log("Server [" + serverName + "] marked as dead, waiting for it to " +
"finish dead processing");
- while (sm.deadservers.isDeadServer(serverName)) {
- log("Server [" + serverName + "] still marked as dead, waiting");
+ while (sm.areDeadServersInProgress()) {
+ log("Server [" + serverName + "] still being processed, waiting");
Thread.sleep(100);
}
log("Server [" + serverName + "] done with server shutdown processing");
Index: src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
===================================================================
--- src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java (revision 1040242)
+++ src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java (working copy)
@@ -25,7 +25,7 @@
public class TestDeadServer {
@Test public void testIsDead() {
- DeadServer ds = new DeadServer();
+ DeadServer ds = new DeadServer(2);
final String hostname123 = "127.0.0.1,123,3";
assertFalse(ds.isDeadServer(hostname123, false));
assertFalse(ds.isDeadServer(hostname123, true));
@@ -34,5 +34,25 @@
assertFalse(ds.isDeadServer("127.0.0.1:1", true));
assertFalse(ds.isDeadServer("127.0.0.1:1234", true));
assertTrue(ds.isDeadServer("127.0.0.1:123", true));
+ assertTrue(ds.areDeadServersInProgress());
+ ds.finish(hostname123);
+ assertFalse(ds.areDeadServersInProgress());
+ final String hostname1234 = "127.0.0.2,1234,4";
+ ds.add(hostname1234);
+ assertTrue(ds.isDeadServer(hostname123, false));
+ assertTrue(ds.isDeadServer(hostname1234, false));
+ assertTrue(ds.areDeadServersInProgress());
+ ds.finish(hostname1234);
+ assertFalse(ds.areDeadServersInProgress());
+ final String hostname12345 = "127.0.0.2,12345,4";
+ ds.add(hostname12345);
+ // hostname123 should now be evicted
+ assertFalse(ds.isDeadServer(hostname123, false));
+ // but others should still be dead
+ assertTrue(ds.isDeadServer(hostname1234, false));
+ assertTrue(ds.isDeadServer(hostname12345, false));
+ assertTrue(ds.areDeadServersInProgress());
+ ds.finish(hostname12345);
+ assertFalse(ds.areDeadServersInProgress());
}
}
\ No newline at end of file
Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1040242)
+++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy)
@@ -87,7 +87,7 @@
// Reporting to track master metrics.
private final MasterMetrics metrics;
- final DeadServer deadservers = new DeadServer();
+ private final DeadServer deadservers;
private final long maxSkew;
@@ -104,6 +104,8 @@
this.metrics = metrics;
Configuration c = master.getConfiguration();
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
+ this.deadservers =
+ new DeadServer(c.getInt("hbase.master.maxdeadservers", 100));
}
/**
@@ -400,6 +402,14 @@
}
/**
+ * Checks if any dead servers are currently in progress.
+ * @return true if any RS are being processed as dead, false if not
+ */
+ public boolean areDeadServersInProgress() {
+ return this.deadservers.areDeadServersInProgress();
+ }
+
+ /**
* @param hsa
* @return The HServerInfo whose HServerAddress is hsa or null
* if nothing found.
Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1040242)
+++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy)
@@ -663,7 +663,7 @@
abbreviate(this.assignmentManager.getRegionsInTransition().toString(), 256));
return false;
}
- if (!this.serverManager.getDeadServers().isEmpty()) {
+ if (!this.serverManager.areDeadServersInProgress()) {
LOG.debug("Not running balancer because dead regionserver processing");
}
Map> assignments =
Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision 1040242)
+++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (working copy)
@@ -22,6 +22,8 @@
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
import java.util.Set;
import org.apache.commons.lang.NotImplementedException;
@@ -40,7 +42,21 @@
*/
private final Set deadServers = new HashSet();
+ /** Linked list of dead servers used to bound size of dead server set */
+ private final List deadServerList = new LinkedList();
+ /** Maximum number of dead servers to keep track of */
+ private final int maxDeadServers;
+
+ /** Number of dead servers currently being processed */
+ private int numProcessing;
+
+ public DeadServer(int maxDeadServers) {
+ super();
+ this.maxDeadServers = maxDeadServers;
+ this.numProcessing = 0;
+ }
+
/**
* @param serverName
* @return true if server is dead
@@ -61,12 +77,36 @@
return HServerInfo.isServer(this, serverName, hostAndPortOnly);
}
+ /**
+ * Checks if there are currently any dead servers being processed by the
+ * master. Returns true if at least one region server is currently being
+ * processed as dead.
+ * @return true if any RS are being processed as dead
+ */
+ public boolean areDeadServersInProgress() {
+ return numProcessing != 0;
+ }
+
public synchronized Set clone() {
Set clone = new HashSet(this.deadServers.size());
clone.addAll(this.deadServers);
return clone;
}
+ public synchronized boolean add(String e) {
+ this.numProcessing++;
+ // Check to see if we are at capacity for dead servers
+ if (deadServerList.size() == this.maxDeadServers) {
+ deadServers.remove(deadServerList.remove(0));
+ }
+ deadServerList.add(e);
+ return deadServers.add(e);
+ }
+
+ public synchronized void finish(String e) {
+ this.numProcessing--;
+ }
+
public synchronized int size() {
return deadServers.size();
}
@@ -91,12 +131,8 @@
return deadServers.toArray(a);
}
- public synchronized boolean add(String e) {
- return deadServers.add(e);
- }
-
public synchronized boolean remove(Object o) {
- return deadServers.remove(o);
+ throw new UnsupportedOperationException();
}
public synchronized boolean containsAll(Collection> c) {
Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
===================================================================
--- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1040242)
+++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy)
@@ -147,7 +147,7 @@
this.services.getAssignmentManager().assign(e.getKey(), true);
}
}
- this.deadServers.remove(serverName);
+ this.deadServers.finish(serverName);
LOG.info("Finished processing of shutdown of " + serverName);
}