Index: src/main/java/org/apache/hadoop/hbase/HServerInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HServerInfo.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/HServerInfo.java (working copy) @@ -295,4 +295,19 @@ return inServerName; } + public static HServerInfo getServerInfo(String serverName) { + if (serverName != null && serverName.length() > 0) { + String[] split = serverName.split(SERVERNAME_SEPARATOR); + if (split.length == 3) { + String hostName = split[0]; + int port = Integer.parseInt(split[1]); + long startCode = Long.parseLong(split[2]); + String hostAndPort = hostName + ":" + port; + HServerAddress serverAddress = new HServerAddress(hostAndPort); + return new HServerInfo(serverAddress, startCode, port, hostName); + } + } + return null; + } + } Index: src/main/java/org/apache/hadoop/hbase/master/DeadServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/master/DeadServer.java (working copy) @@ -20,17 +20,19 @@ package org.apache.hadoop.hbase.master; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.commons.lang.NotImplementedException; import org.apache.hadoop.hbase.HServerInfo; /** - * Class to hold dead servers list and utility querying dead server list. + * Class to hold dead servers list, utility querying dead server list and the + * dead servers being processed by the ServerShutdownHandler. */ public class DeadServer implements Set { /** @@ -42,6 +44,19 @@ */ private final Set deadServers = new HashSet(); + private enum ServerType { + NORMAL, // Normal dead server + ROOT, // The dead server carried -ROOT- + META // The dead server carried .META. + } + + /** + * Dead servers under processing by the ServerShutdownHander. Map of + * ServerType to set of being processed dead servers + */ + private final Map> deadServersUnderProcessing + = new HashMap>(); + /** Maximum number of dead servers to keep track of */ private final int maxDeadServers; @@ -52,6 +67,9 @@ super(); this.maxDeadServers = maxDeadServers; this.numProcessing = 0; + for (ServerType svrType : ServerType.values()) { + deadServersUnderProcessing.put(svrType, new HashSet()); + } } /** @@ -105,19 +123,61 @@ return numProcessing != 0; } + /** + * @return true if root server is being processed as dead. + */ + public boolean isDeadRootServerInProgress() { + return !deadServersUnderProcessing.get(ServerType.ROOT).isEmpty(); + } + + /** + * @return true if meta server is being processed as dead. + */ + public boolean isDeadMetaServerInProgress() { + return !deadServersUnderProcessing.get(ServerType.META).isEmpty(); + } + public synchronized Set clone() { Set clone = new HashSet(this.deadServers.size()); clone.addAll(this.deadServers); return clone; } + synchronized Set getDeadServersBeingProcessed() { + Set deadNormalServersBeingProcessed = this.deadServersUnderProcessing + .get(ServerType.NORMAL); + Set clone = new HashSet(); + clone.addAll(deadNormalServersBeingProcessed); + return clone; + } + public synchronized boolean add(String e) { this.numProcessing++; + deadServersUnderProcessing.get(ServerType.NORMAL).add(e); return deadServers.add(e); } + /** + * Add server to set of dead root servers if carryingRoot or set of dead meta + * servers if carryingMeta + * We don't need to increment numProcessing because add() has done that + */ + public synchronized void add(String server, boolean carryingRoot, + boolean carryingMeta) { + if (carryingRoot) { + deadServersUnderProcessing.get(ServerType.ROOT).add(server); + } + if (carryingMeta) { + deadServersUnderProcessing.get(ServerType.META).add(server); + } + } + public synchronized void finish(String e) { this.numProcessing--; + for(Entry> deadServerUnderProcessing: + deadServersUnderProcessing.entrySet()){ + deadServerUnderProcessing.getValue().remove(e); + } } public synchronized int size() { Index: src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -20,7 +20,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; -import java.util.Map; +import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -155,7 +155,7 @@ * @param onlineServers Map of online servers keyed by * {@link HServerInfo#getServerName()} */ - void splitLogAfterStartup(final Map onlineServers) { + void splitLogAfterStartup(final Set onlineServers) { Path logsDirPath = new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); try { if (!this.fs.exists(logsDirPath)) { @@ -176,7 +176,7 @@ } for (FileStatus status : logFolders) { String serverName = status.getPath().getName(); - if (onlineServers.get(serverName) == null) { + if (!onlineServers.contains(serverName)) { LOG.info("Log folder " + status.getPath() + " doesn't belong " + "to a known region server, splitting"); splitLog(serverName); @@ -333,4 +333,13 @@ new Path(rootdir, region.getTableDesc().getNameAsString()), region.getEncodedName(), familyName), true); } + + /** + * return true if server's log dir exists + */ + public boolean logDirExists(String serverName) throws IOException { + Path serverLogDir = new Path(this.rootdir, + HLog.getHLogDirectoryName(serverName.toString())); + return this.fs.exists(serverLogDir); + } } Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (working copy) @@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; -import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; @@ -227,6 +226,22 @@ } /** + * Method used by master on startup trying to figure state of cluster. Returns + * the current meta location unless its null. In this latter case, it has not + * yet been set so go check whats up in -ROOT- and return that. + * + * @return {@link ServerName} for server hosting .META. or if + * null, we'll read the location that is up in -ROOT- + * table (which could be null or just plain stale). + * @throws IOException + */ + public HServerAddress getMetaLocationOrReadLocationFromRoot() + throws IOException { + HServerAddress sn = getMetaLocation(); + return sn != null ? sn : MetaReader.getMetaRegionLocation(this); + } + + /** * Waits indefinitely for availability of -ROOT-. Used during * cluster startup. * @throws InterruptedException if interrupted while waiting Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -276,7 +276,22 @@ } /** + * Gets the location of .META. region by reading content of + * -ROOT-. + * + * @param ct + * @return location of .META. region as a {@link ServerName} or + * null if not found + * @throws IOException + */ + static HServerAddress getMetaRegionLocation(final CatalogTracker ct) + throws IOException { + return MetaReader.readMetaLocation(ct.waitForRootServerConnectionDefault()); + } + + /** * Reads the location of META from ROOT. + * * @param metaServer connection to server hosting ROOT * @return location of META in ROOT, null if not available * @throws IOException Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; -import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; /** * The ServerManager class manages info about region servers - HServerInfo, @@ -162,7 +161,7 @@ } } - private HServerInfo haveServerWithSameHostAndPortAlready(final String hostnamePort) { + HServerInfo haveServerWithSameHostAndPortAlready(final String hostnamePort) { synchronized (this.onlineServers) { for (Map.Entry e: this.onlineServers.entrySet()) { if (e.getValue().getHostnamePort().equals(hostnamePort)) { @@ -420,11 +419,22 @@ } } + /** + * @return Set of known dead servers. + */ public Set getDeadServers() { return this.deadservers.clone(); } /** + * @return Set of dead servers which are being processed by the + * ServerShutdownHander. + */ + public Set getDeadServersBeingProcessed() { + return this.deadservers.getDeadServersBeingProcessed(); + } + + /** * Checks if any dead servers are currently in progress. * @return true if any RS are being processed as dead, false if not */ @@ -433,6 +443,20 @@ } /** + * @return true if root server is being processed as dead. + */ + public boolean isDeadRootServerInProgress() { + return this.deadservers.isDeadRootServerInProgress(); + } + + /** + * @return true if meta server is being processed as dead. + */ + public boolean isDeadMetaServerInProgress() { + return this.deadservers.isDeadMetaServerInProgress(); + } + + /** * @param hsa * @return The HServerInfo whose HServerAddress is hsa or null * if nothing found. @@ -538,6 +562,7 @@ boolean carryingMeta = address != null && hsi.getServerAddress().equals(address); if (carryingRoot || carryingMeta) { + this.deadservers.add(serverName, carryingRoot, carryingMeta); this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, this.services, this.deadservers, info, carryingRoot, carryingMeta)); } else { Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -26,8 +26,10 @@ import java.net.UnknownHostException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; @@ -41,6 +43,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.HServerLoad; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; @@ -52,7 +55,6 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; -import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.Result; @@ -376,10 +378,42 @@ // Wait for region servers to report in. Returns count of regions. int regionCount = this.serverManager.waitForRegionServers(); + + // Check zk for regionservers that are up but didn't register + for (String sn : this.regionServerTracker.getOnlineServerNames()) { + if (!this.serverManager.isServerOnline(sn)) { + HServerInfo serverInfo = HServerInfo.getServerInfo(sn); + if (serverInfo != null) { + HServerInfo existingServer = serverManager + .haveServerWithSameHostAndPortAlready(serverInfo + .getHostnamePort()); + if (existingServer == null) { + // Not registered; add it. + LOG.info("Registering server found up in zk but who has not yet " + + "reported in: " + sn); + // We set serverLoad with one region, it could differentiate with + // regionserver which is started just now + HServerLoad serverLoad = new HServerLoad(); + serverLoad.setNumberOfRegions(1); + serverInfo.setLoad(serverLoad); + this.serverManager.recordNewServer(serverInfo, true, null); + } + } else { + LOG.warn("Server " + sn + + " found up in zk, but is not a correct server name"); + } + } + } + Set knownServers = new HashSet(); + knownServers.addAll(serverManager.getOnlineServers().keySet()); + if (this.serverManager.areDeadServersInProgress()) { + // Dead servers are processing, their logs would be split by + // ServerShutdownHandler + knownServers.addAll(serverManager.getDeadServersBeingProcessed()); + } // TODO: Should do this in background rather than block master startup - this.fileSystemManager. - splitLogAfterStartup(this.serverManager.getOnlineServers()); + this.fileSystemManager.splitLogAfterStartup(knownServers); // Make sure root and meta assigned before proceeding. assignRootAndMeta(); @@ -395,7 +429,7 @@ this.assignmentManager.assignAllUserRegions(); } else { LOG.info("Master startup proceeding: master failover"); - this.assignmentManager.processFailover(); + this.assignmentManager.processFailover(knownServers); } // Fixing up missing daughters if any @@ -428,9 +462,23 @@ boolean rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.ROOT_REGIONINFO); if (!catalogTracker.verifyRootRegionLocation(timeout)) { - this.assignmentManager.assignRoot(); + HServerInfo rootServerInfo = this.serverManager + .getHServerInfo(catalogTracker.getRootLocation()); + if (rootServerInfo != null) { + HServerLoad rootServerLoad = rootServerInfo.getLoad(); + if (rootServerLoad != null && rootServerLoad.getNumberOfRegions() > 0) { + // If rootServer is online && not start just now, we expire it + this.serverManager.expireServer(rootServerInfo); + } + } + // RootServer is may being processed as dead server. Before assign root, + // we need to wait until its log is splitted. + waitUntilNoLogDir(rootServerInfo.getServerName()); + if (!this.serverManager.isDeadRootServerInProgress()) { + this.assignmentManager.assignRoot(); + } this.catalogTracker.waitForRoot(); - //This guarantees that the transition has completed + // This guarantees that the transition has completed this.assignmentManager.waitForAssignment(HRegionInfo.ROOT_REGIONINFO); assigned++; } @@ -444,7 +492,21 @@ rit = this.assignmentManager. processRegionInTransitionAndBlockUntilAssigned(HRegionInfo.FIRST_META_REGIONINFO); if (!this.catalogTracker.verifyMetaRegionLocation(timeout)) { - this.assignmentManager.assignMeta(); + HServerInfo metaServerInfo = this.serverManager + .getHServerInfo(catalogTracker.getMetaLocationOrReadLocationFromRoot()); + if (metaServerInfo != null) { + HServerLoad metaServerLoad = metaServerInfo.getLoad(); + if (metaServerLoad != null && metaServerLoad.getNumberOfRegions() > 0) { + // If metaServer is online && not start just now, we expire it + this.serverManager.expireServer(metaServerInfo); + } + } + // MetaServer is may being processed as dead server. Before assign meta, + // we need to wait until its log is splitted. + waitUntilNoLogDir(metaServerInfo.getServerName()); + if (!this.serverManager.isDeadMetaServerInProgress()) { + this.assignmentManager.assignMeta(); + } this.catalogTracker.waitForMeta(); // Above check waits for general meta availability but this does not // guarantee that the transition has completed @@ -496,6 +558,26 @@ } } + /** + * Wait until server's log dir doesn't exist or time out. + * + */ + private void waitUntilNoLogDir(final String serverName) throws IOException, + InterruptedException { + int waitTime = conf.getInt("hbase.master.assignMeta.timeout", 60000); + for (int i = 0; serverName != null; i++) { + if (!this.fileSystemManager.logDirExists(serverName)) { + break; + } + if (i >= waitTime / 1000) { + throw new RuntimeException( + "Timed out waiting to finish splitting log for " + serverName); + } + Thread.sleep(1000); + } + + } + /* * @return This masters' address. * @throws UnknownHostException Index: src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java (working copy) @@ -98,4 +98,14 @@ public List getOnlineServers() throws KeeperException { return ZKUtil.listChildrenAndGetAsAddresses(watcher, watcher.rsZNode); } + + /** + * Gets the online servers from zookeeper. + * + * @return list of online servers from zk + * @throws KeeperException + */ + public List getOnlineServerNames() throws KeeperException { + return ZKUtil.listChildrenNoWatch(watcher, watcher.rsZNode); + } } Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1233193) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -193,11 +193,13 @@ /** * Handle failover. Restore state from META and ZK. Handle any regions in * transition. Presumes .META. and -ROOT- deployed. + * @param onlineServers onlined servers when master starts * @throws KeeperException * @throws IOException * @throws InterruptedException */ - void processFailover() throws KeeperException, IOException, InterruptedException { + void processFailover(final Set onlineServers) + throws KeeperException, IOException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are // synchronized. The presumption is that in this case it is safe since this @@ -218,7 +220,7 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions Map>> deadServers = - rebuildUserRegions(); + rebuildUserRegions(onlineServers); // Process list of dead servers; note this will add regions to the RIT. // processRegionsInTransition will read them and assign them out. processDeadServers(deadServers); @@ -1592,12 +1594,15 @@ *

* Returns a map of servers that are not found to be online and the regions * they were hosting. + * @param onlineServers if one region's location belongs to onlineServers, it + * doesn't need to be assigned * @return map of servers not online to their assigned regions, as stored * in META * @throws IOException * @throws KeeperException */ - private Map>> rebuildUserRegions() + private Map>> rebuildUserRegions( + final Set onlineServers) throws IOException, KeeperException { // Region assignment from META List results = MetaReader.fullScanOfResults(catalogTracker); @@ -1630,7 +1635,7 @@ // need to enable the table if not disabled or disabling // this will be used in rolling restarts enableTableIfNotDisabledOrDisabling(disabled, disabling, tableName); - } else if (!serverManager.isServerOnline(regionLocation.getServerName())) { + } else if (!onlineServers.contains(regionLocation.getServerName())) { // Region is located on a server that isn't online List> offlineRegions = offlineServers.get(regionLocation.getServerName());