diff --git a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index efcbb99..d0002fb 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -42,9 +42,6 @@ public class DeadServer implements Set { */ private final Set deadServers = new HashSet(); - /** Linked list of dead servers used to bound size of dead server set */ - private final List deadServerList = new LinkedList(); - /** Maximum number of dead servers to keep track of */ private final int maxDeadServers; @@ -64,6 +61,16 @@ public class DeadServer implements Set { public boolean isDeadServer(final String serverName) { return isDeadServer(serverName, false); } + + /** + * @param serverName Servername as either host:port or + * host,port,startcode. + * @return true if this server was dead before and coming back alive again + */ + public boolean isDeadServerComingBackAlive(final String serverName) { + return HServerInfo.isServer(this, serverName, true); + } + /** * @param serverName Servername as either host:port or @@ -96,10 +103,6 @@ public class DeadServer implements Set { public synchronized boolean add(String e) { this.numProcessing++; // Check to see if we are at capacity for dead servers - if (deadServerList.size() == this.maxDeadServers) { - deadServers.remove(deadServerList.remove(0)); - } - deadServerList.add(e); return deadServers.add(e); } @@ -132,7 +135,9 @@ public class DeadServer implements Set { } public synchronized boolean remove(Object o) { - throw new UnsupportedOperationException(); + // Do we decrement the num processing during remove?? + this.numProcessing--; + this.deadServers.remove(o); } public synchronized boolean containsAll(Collection c) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 42c8bda..54ec0d9 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -131,6 +131,25 @@ public class ServerManager { } /** + * Check if this server which was previously marked as dead is coming back alive now. If yes, + * update the deadserver list so we have the correct status. + * @param serverName + * @return true if this server is coming back alive after death with a different startcode, + * or false other wise. + */ + private boolean IsDeadServerComingBackAlive(final String serverName) + { + if (!this.deadservers.isDeadServerComingBackAlive(serverName)) return false; + // This server has now become alive after we marked it as dead. We now correct the + // deadserver list to reflect this server's current status. + String message = "Server " + serverName + " is come back up now; Removing " + + serverName + " from dead server list"; + LOG.debug(message); + this.deadservers.remove(serverName); + return true; + } + + /** * Test to see if we have a server of same host and port already. * @param serverInfo * @throws PleaseHoldException @@ -190,11 +209,13 @@ public class ServerManager { */ private void checkIsDead(final String serverName, final String what) throws YouAreDeadException { - if (!this.deadservers.isDeadServer(serverName)) return; - String message = "Server " + what + " rejected; currently processing " + - serverName + " as dead server"; - LOG.debug(message); - throw new YouAreDeadException(message); + if (!IsDeadServerComingBackAlive(serverName)) { + if (!this.deadservers.isDeadServer(serverName)) return; + String message = "Server " + what + " rejected; currently processing " + + serverName + " as dead server"; + LOG.debug(message); + throw new YouAreDeadException(message); + } } /**