Index: src/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 931032) +++ src/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -83,9 +83,11 @@ * expires, but the server is still running. After the network is healed, * and it's server logs are recovered, it will be told to call server startup * because by then, its regions have probably been reassigned. + * + * newSetFromMap is means of making a Set out of a Map. */ protected final Set deadServers = - Collections.synchronizedSet(new HashSet()); + Collections.newSetFromMap(new ConcurrentHashMap()) /** SortedMap server load -> Set of server names */ final SortedMap> loadToServers = @@ -150,52 +152,29 @@ /** * Let the server manager know a new regionserver has come online * @param serverInfo - * @throws Leases.LeaseStillHeldException + * @throws IOException */ public void regionServerStartup(final HServerInfo serverInfo) - throws Leases.LeaseStillHeldException { + throws IOException { + // Test for case where we get a region startup message from a regionserver + // that has been quickly restarted but whose znode expiration handler has + // not yet run, or from a server whose fail we are currently processing. HServerInfo info = new HServerInfo(serverInfo); - String serverName = info.getServerName(); - if (serversToServerInfo.containsKey(serverName) || - deadServers.contains(serverName)) { - LOG.debug("Server start was rejected: " + serverInfo); - LOG.debug("serversToServerInfo.containsKey: " + serversToServerInfo.containsKey(serverName)); - LOG.debug("deadServers.contains: " + deadServers.contains(serverName)); - throw new Leases.LeaseStillHeldException(serverName); + String hostAndPort = info.getServerAddress().toString(); + if (this.serverAddressToServerInfo.containsKey(hostAndPort)) { + LOG.debug("Server start rejected; we already have a " + hostAndPort + + " registered"); + throw new Leases.LeaseStillHeldException(hostAndPort); } - - LOG.info("Received start message from: " + serverName); - // Go on to process the regionserver registration. - HServerLoad load = serversToLoad.remove(serverName); - if (load != null) { - // The startup message was from a known server. - // Remove stale information about the server's load. - synchronized (loadToServers) { - Set servers = loadToServers.get(load); - if (servers != null) { - servers.remove(serverName); - if (servers.size() > 0) - loadToServers.put(load, servers); - else - loadToServers.remove(load); - } - } + if (isDead(hostAndPort, true)) { + LOG.debug("Server start rejected; currently processing " + hostAndPort + + " failure"); + throw new Leases.LeaseStillHeldException(hostAndPort); } - HServerInfo storedInfo = serversToServerInfo.remove(serverName); - if (storedInfo != null && !master.closed.get()) { - // The startup message was from a known server with the same name. - // Timeout the old one right away. - master.getRootRegionLocation(); - try { - master.toDoQueue.put(new ProcessServerShutdown(master, storedInfo)); - } catch (InterruptedException e) { - LOG.error("Insertion into toDoQueue was interrupted", e); - } - } + LOG.info("Received start message from: " + info.getServerName()); recordNewServer(info); } - /** * Adds the HSI to the RS list and creates an empty load * @param info The region server informations @@ -872,10 +851,25 @@ * @param serverName * @return true if server is dead */ - public boolean isDead(String serverName) { - return deadServers.contains(serverName); + public boolean isDead(final String serverName) { + return isDead(serverName, false); } + /** + * @param serverName Servername as either host:port or host_port_startcode. + * @param hostAndPortOnly True if serverName is host and + * port only (host:port) and if so, then we do a prefix compare (ignoring + * start codes) looking for dead server. + * @return true if server is dead + */ + boolean isDead(final String serverName, final boolean hostAndPortOnly) { + if (!hostAndPortOnly) return deadServers.contains(serverName); + for (String hostPortStartCode: this.deadServers) { + if (hostPortStartCode.startsWith(serverName)) return true; + } + return false; + } + public boolean canAssignUserRegions() { if (minimumServerCount == 0) { return true;