Index: src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java (date 1299110957000) +++ src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java (revision ) @@ -46,13 +46,24 @@ assertFalse(ds.areDeadServersInProgress()); final String hostname12345 = "127.0.0.2,12345,4"; ds.add(hostname12345); - // hostname123 should now be evicted - assertFalse(ds.isDeadServer(hostname123, false)); - // but others should still be dead assertTrue(ds.isDeadServer(hostname1234, false)); assertTrue(ds.isDeadServer(hostname12345, false)); assertTrue(ds.areDeadServersInProgress()); ds.finish(hostname12345); assertFalse(ds.areDeadServersInProgress()); + + // Already dead = 127.0.0.1,9090,112321 + // Coming back alive = 127.0.0.1,9090,223341 + + final String deadServer = "127.0.0.1,9090,112321"; + assertFalse(ds.isDeadServerComingBackAlive(deadServer)); + ds.add(deadServer); + assertTrue(ds.isDeadServer(deadServer)); + final String deadServerHostComingAlive = "127.0.0.1,9090,112321"; + assertTrue(ds.isDeadServerComingBackAlive(deadServerHostComingAlive)); + ds.remove(deadServer); + assertFalse(ds.isDeadServer(deadServer)); + assertFalse(ds.isDeadServerComingBackAlive(deadServerHostComingAlive)); + } } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (date 1299110957000) +++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision ) @@ -42,9 +42,6 @@ */ private final Set deadServers = new HashSet(); - /** Linked list of dead servers used to bound size of dead server set */ - private final List deadServerList = new LinkedList(); - /** Maximum number of dead servers to keep track of */ private final int maxDeadServers; @@ -68,6 +65,15 @@ /** * @param serverName Servername as either host:port or * host,port,startcode. + * @return true if this server was dead before and coming back alive again + */ + public boolean isDeadServerComingBackAlive(final String newServerName) { + return HServerInfo.isServerHostNameAndPortMatch(this.deadServers, newServerName); + } + + /** + * @param serverName Servername as either host:port or + * host,port,startcode. * @param hostAndPortOnly True if serverName is host and * port only (host:port) and if so, then we do a prefix compare * (ignoring start codes) looking for dead server. @@ -95,11 +101,6 @@ public synchronized boolean add(String e) { this.numProcessing++; - // Check to see if we are at capacity for dead servers - if (deadServerList.size() == this.maxDeadServers) { - deadServers.remove(deadServerList.remove(0)); - } - deadServerList.add(e); return deadServers.add(e); } @@ -132,7 +133,7 @@ } public synchronized boolean remove(Object o) { - throw new UnsupportedOperationException(); + return (this.deadServers.remove(o)); } public synchronized boolean containsAll(Collection c) { Index: src/main/java/org/apache/hadoop/hbase/HServerInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HServerInfo.java (date 1299110957000) +++ src/main/java/org/apache/hadoop/hbase/HServerInfo.java (revision ) @@ -27,6 +27,7 @@ import java.util.Set; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.Strings; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; @@ -279,4 +280,34 @@ } return false; } + + + private static String getServerNameStrippedOfStartCode(String inServerName) { + if (inServerName != null && inServerName.length() > 0) { + int index = inServerName.lastIndexOf(SERVERNAME_SEPARATOR); + if (index > 0) { + return inServerName.substring(0, index); -} + } + } + return inServerName; + } + + /** + * Check whether the new server name (excluding the startcode) is matching one of already dead + * servers (excluding the startcode). + * @param deadServers + * @param newServerName + * @return True if the new server host + port is matching one of already marked dead servers. + */ + public static boolean isServerHostNameAndPortMatch(final Set deadServers, + final String newServerName) { + + String newServerHostPortStrippedOfStartCode = getServerNameStrippedOfStartCode(newServerName); + for (String deadServerHostPortStartCode: deadServers) { + String deadServerHostPortStrippedOfStartCode = getServerNameStrippedOfStartCode(deadServerHostPortStartCode); + if (deadServerHostPortStrippedOfStartCode.equals(newServerHostPortStrippedOfStartCode)) return true; + } + return false; + } + +} Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (date 1299110957000) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision ) @@ -131,6 +131,24 @@ } /** + * Check if this server which was previously marked as dead is coming back alive now. If yes, + * update the deadserver list so we have the correct status. + * @param serverName + */ + + private void checkDeadServerComingBackAlive(final String serverName) + { + if (this.deadservers.isDeadServerComingBackAlive(serverName)) { + // This server has now become alive after we marked it as dead. We now remove this server from the + // deadserver list to reflect this server's current status. + String message = "Server " + serverName + " is come back up now; Removing " + + serverName + " from dead server list"; + LOG.debug(message); + this.deadservers.remove(serverName); + } + } + + /** * Test to see if we have a server of same host and port already. * @param serverInfo * @throws PleaseHoldException @@ -188,14 +206,20 @@ * @param what START or REPORT * @throws LeaseStillHeldException */ + private void checkIsDead(final String serverName, final String what) throws YouAreDeadException { - if (!this.deadservers.isDeadServer(serverName)) return; + if (this.deadservers.isDeadServer(serverName)) { + // host name, port and start code all match with existing one of the dead servers. So, + // this server must be dead. - String message = "Server " + what + " rejected; currently processing " + - serverName + " as dead server"; - LOG.debug(message); - throw new YouAreDeadException(message); - } + String message = "Server " + what + " rejected; currently processing " + + serverName + " as dead server"; + LOG.debug(message); + throw new YouAreDeadException(message); + } + // Check whether its a case of dead server coming back alive with a different start code. + checkDeadServerComingBackAlive(serverName); + } /** * Adds the HSI to the RS list