Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 1027291) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -794,10 +795,35 @@ cluster.waitForActiveAndReadyMaster(); log("Master is ready"); + // Let's add some weird states to master in-memory state + + // PENDING_OPEN and enabled + region = enabledRegions.remove(0); + regionsThatShouldBeOnline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_OPEN)); + // PENDING_OPEN and disabled + region = disabledRegions.remove(0); + regionsThatShouldBeOffline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_OPEN)); + // PENDING_CLOSE and enabled + region = enabledRegions.remove(0); + regionsThatShouldBeOnline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_CLOSE)); + // PENDING_CLOSE and disabled + region = disabledRegions.remove(0); + regionsThatShouldBeOffline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_CLOSE)); + // Failover should be completed, now wait for no RIT log("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); - log("No more RIT in ZK, now doing final test verification"); + log("No more RIT in ZK"); + master.assignmentManager.waitUntilNoRegionsInTransition(120000); + log("No more RIT in RIT map, doing final test verification"); // Grab all the regions that are online across RSs Set onlineRegions = new TreeSet(); Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (working copy) @@ -97,7 +97,7 @@ * @param regionName region name * @return full path node name */ - private static String getNodeName(ZooKeeperWatcher zkw, String regionName) { + public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); } @@ -762,4 +762,44 @@ Thread.sleep(200); } } + + /** + * Verifies that the specified region is in the specified state in ZooKeeper. + *

+ * Returns true if region is in transition and in the specified state in + * ZooKeeper. Returns false if the region does not exist in ZK or is in + * a different state. + *

+ * Method synchronizes() with ZK so will yield an up-to-date result but is + * a slow read. + * @param watcher + * @param region + * @param expectedState + * @return true if region exists and is in expected state + */ + public static boolean verifyRegionState(ZooKeeperWatcher zkw, + HRegionInfo region, EventType expectedState) + throws KeeperException { + String encoded = region.getEncodedName(); + + String node = getNodeName(zkw, encoded); + zkw.sync(node); + + // Read existing data of the node + byte [] existingBytes = null; + try { + existingBytes = ZKUtil.getDataAndWatch(zkw, node); + } catch (KeeperException.NoNodeException nne) { + return false; + } catch (KeeperException e) { + throw e; + } + if (existingBytes == null) return false; + RegionTransitionData existingData = + RegionTransitionData.fromBytes(existingBytes); + if (existingData.getEventType() == expectedState){ + return true; + } + return false; + } } Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -45,11 +45,15 @@ import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; +import org.apache.hadoop.hbase.master.handler.RootServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.KeeperException; /** * The ServerManager class manages info about region servers - HServerInfo, @@ -490,8 +494,31 @@ } return; } - this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, + // Was this server carrying -ROOT- or .META.? If so, it gets special + // handling. + Pair carryingCatalog = null; + try { + carryingCatalog = + this.master.getCatalogTracker().processServerShutdown(hsi); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + // Set interrupt on thread and return without processing expired server. + return; + } catch (KeeperException e) { + // Set abort and just return + this.master.abort("In expireServer processing", e); + return; + } + if (carryingCatalog.getFirst()) { + this.services.getExecutorService().submit(new RootServerShutdownHandler(this.master, this.services, this.deadservers, info)); + } else if (carryingCatalog.getSecond()) { + this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, + this.services, this.deadservers, info)); + } else { + this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, + this.services, this.deadservers, info)); + } LOG.debug("Added=" + serverName + " to dead servers, submitted shutdown handler to be executed"); } @@ -546,16 +573,17 @@ * @return true if server acknowledged close, false if not * @throws IOException */ - public void sendRegionClose(HServerInfo server, HRegionInfo region) + public boolean sendRegionClose(HServerInfo server, HRegionInfo region) throws IOException { + if (server == null) return false; HRegionInterface hri = getServerConnection(server); if(hri == null) { LOG.warn("Attempting to send CLOSE RPC to server " + server.getServerName() + " failed because no RPC connection found " + "to this server"); - return; + return false; } - hri.closeRegion(region); + return hri.closeRegion(region); } /** Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.catalog.CatalogTracker; @@ -96,13 +97,13 @@ private TimeoutMonitor timeoutMonitor; /** Regions currently in transition. */ - private final ConcurrentSkipListMap regionsInTransition = + final ConcurrentSkipListMap regionsInTransition = new ConcurrentSkipListMap(); /** Plans for region movement. Key is the encoded version of a region name*/ // TODO: When do plans get cleaned out? Ever? In server open and in server // shutdown processing -- St.Ack - protected final ConcurrentNavigableMap regionPlans = + final ConcurrentNavigableMap regionPlans = new ConcurrentSkipListMap(); /** Set of tables that have been disabled. */ @@ -315,7 +316,7 @@ if (!serverManager.isServerOnline(data.getServerName()) && !this.master.getServerName().equals(data.getServerName())) { LOG.warn("Attempted to handle region transition for server but " + - "server is not online: " + data); + "server is not online: " + data.getRegionName()); return; } String encodedName = HRegionInfo.encodeRegionName(data.getRegionName()); @@ -597,9 +598,8 @@ ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName()); } } catch (KeeperException.NoNodeException nne) { - LOG.warn("Tried to delete closed node for " + regionInfo + " but it " + - "does not exist"); - return; + LOG.debug("Tried to delete closed node for " + regionInfo + " but it " + + "does not exist so just offlining"); } catch (KeeperException e) { this.master.abort("Error deleting CLOSED node in ZK", e); } @@ -976,15 +976,29 @@ } // Send CLOSE RPC try { - serverManager.sendRegionClose(regions.get(region), state.getRegion()); + if(!serverManager.sendRegionClose(regions.get(region), + state.getRegion())) { + throw new NotServingRegionException("Server failed to close region"); + } + } catch (NotServingRegionException nsre) { + // Did not CLOSE, so set region offline and assign it + LOG.debug("Attempted to send CLOSE for region " + + region.getRegionNameAsString() + " but failed, setting region as " + + "OFFLINE and reassigning"); + synchronized (regionsInTransition) { + forceRegionStateToOffline(region); + assign(region); + } } catch (IOException e) { // For now call abort if unexpected exception -- radical, but will get fellas attention. // St.Ack 20101012 + // I don't think IOE can happen anymore, only NSRE IOE is used here + // should be able to remove this at least. jgray 20101024 this.master.abort("Remote unexpected exception", e); } catch (Throwable t) { // For now call abort if unexpected exception -- radical, but will get fellas attention. // St.Ack 20101012 - this.master.abort("Unexpected exception", t); + this.master.abort("Remote unexpected exception", t); } } @@ -1435,14 +1449,40 @@ assign(regionState.getRegion()); break; case PENDING_OPEN: + LOG.info("Region has been PENDING_OPEN for too " + + "long, reassigning region=" + + regionInfo.getRegionNameAsString()); + // Should have a ZK node in OFFLINE state or no node at all + try { + if (ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName())) && + !ZKAssign.verifyRegionState(watcher, regionInfo, + EventType.M_ZK_REGION_OFFLINE)) { + LOG.info("Region exists and not in expected OFFLINE " + + "state so skipping timeout, region=" + + regionInfo.getRegionNameAsString()); + break; + } + } catch (KeeperException ke) { + LOG.error("Unexpected ZK exception timing out " + + "PENDING_CLOSE region", + ke); + break; + } + AssignmentManager.this.setOffline(regionState.getRegion()); + regionState.update(RegionState.State.OFFLINE); + assign(regionState.getRegion()); + break; case OPENING: - LOG.info("Region has been PENDING_OPEN or OPENING for too " + + LOG.info("Region has been OPENING for too " + "long, reassigning region=" + regionInfo.getRegionNameAsString()); - // There could be two cases. No ZK node or ZK in CLOSING. + // Should have a ZK node in OPENING state try { - if (ZKUtil.checkExists(watcher, watcher.assignmentZNode) - != -1 && + if (ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName())) && ZKAssign.transitionNode(watcher, regionInfo, HMaster.MASTER, EventType.RS_ZK_REGION_OPENING, EventType.M_ZK_REGION_OFFLINE, -1) == -1) { @@ -1465,8 +1505,27 @@ "not happen; region=" + regionInfo.getRegionNameAsString()); break; case PENDING_CLOSE: + LOG.info("Region has been PENDING_CLOSE for too " + + "long, running forced unassign again on region=" + + regionInfo.getRegionNameAsString()); + try { + // If the server got the RPC, it will transition the node + // to CLOSING, so only do something here if no node exists + if (!ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName()))) { + unassign(regionInfo, true); + } + } catch (NoNodeException e) { + LOG.debug("Node no longer existed so not forcing another " + + "unassignment"); + } catch (KeeperException e) { + LOG.warn("Unexpected ZK exception timing out a region " + + "close", e); + } + break; case CLOSING: - LOG.info("Region has been PENDING_CLOSE or CLOSING for too " + + LOG.info("Region has been CLOSING for too " + "long, running forced unassign again on region=" + regionInfo.getRegionNameAsString()); try { @@ -1500,6 +1559,7 @@ Map.Entry e = i.next(); if (e.getValue().getDestination().equals(hsi)) { // Use iterator's remove else we'll get CME + LOG.info("REMOVING PLAN " + e.getValue()); i.remove(); } } Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -413,7 +413,8 @@ this.catalogTracker.waitForRoot(); assigned++; } - LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit); + LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit + + ", location=" + catalogTracker.getRootLocation()); // Work on meta region rit = this.assignmentManager. @@ -426,7 +427,8 @@ this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO); assigned++; } - LOG.info(".META. assigned=" + assigned + ", rit=" + rit); + LOG.info(".META. assigned=" + assigned + ", rit=" + rit + + ", location=" + catalogTracker.getMetaLocation()); return assigned; } @@ -502,6 +504,10 @@ conf.getInt("hbase.master.executor.closeregion.threads", 5)); this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, conf.getInt("hbase.master.executor.serverops.threads", 3)); + this.executorService.startExecutorService(ExecutorType.MASTER_ROOT_SERVER_OPERATIONS, + conf.getInt("hbase.master.executor.serverops.threads", 1)); + this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, + conf.getInt("hbase.master.executor.serverops.threads", 1)); this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, conf.getInt("hbase.master.executor.tableops.threads", 3)); Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1027292) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -57,7 +57,12 @@ public ServerShutdownHandler(final Server server, final MasterServices services, final DeadServer deadServers, final HServerInfo hsi) { - super(server, EventType.M_SERVER_SHUTDOWN); + this(server, services, deadServers, hsi, EventType.M_SERVER_SHUTDOWN); + } + + ServerShutdownHandler(final Server server, final MasterServices services, + final DeadServer deadServers, final HServerInfo hsi, EventType type) { + super(server, type); this.hsi = hsi; this.server = server; this.services = services; @@ -69,17 +74,6 @@ @Override public void process() throws IOException { - Pair carryingCatalog = null; - try { - carryingCatalog = - this.server.getCatalogTracker().processServerShutdown(this.hsi); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("Interrupted", e); - } catch (KeeperException e) { - this.server.abort("In server shutdown processing", e); - throw new IOException("Aborting", e); - } final String serverName = this.hsi.getServerName(); LOG.info("Splitting logs for " + serverName); @@ -92,7 +86,7 @@ this.services.getAssignmentManager().processServerShutdown(this.hsi); // Assign root and meta if we were carrying them. - if (carryingCatalog.getFirst()) { // -ROOT- + if (getEventType().equals(EventType.M_ROOT_SERVER_SHUTDOWN)) { // -ROOT- try { this.services.getAssignmentManager().assignRoot(); } catch (KeeperException e) { @@ -100,7 +94,7 @@ throw new IOException("Aborting", e); } } - if (carryingCatalog.getSecond()) { // .META. + if (getEventType().equals(EventType.M_META_SERVER_SHUTDOWN)) { // .META. this.services.getAssignmentManager().assignMeta(); } Index: src/main/java/org/apache/hadoop/hbase/master/handler/RootServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/RootServerShutdownHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/handler/RootServerShutdownHandler.java (revision 0) @@ -0,0 +1,36 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.master.DeadServer; +import org.apache.hadoop.hbase.master.MasterServices; + +/** + * Shutdown handler for the server hosting -ROOT- region. + */ +public class RootServerShutdownHandler extends ServerShutdownHandler { + public RootServerShutdownHandler(final Server server, + final MasterServices services, + final DeadServer deadServers, final HServerInfo hsi) { + super(server, services, deadServers, hsi, EventType.M_ROOT_SERVER_SHUTDOWN); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java (revision 0) @@ -0,0 +1,36 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.master.DeadServer; +import org.apache.hadoop.hbase.master.MasterServices; + +/** + * Shutdown handler for the server hosting .META. region. + */ +public class MetaServerShutdownHandler extends ServerShutdownHandler { + public MetaServerShutdownHandler(final Server server, + final MasterServices services, + final DeadServer deadServers, final HServerInfo hsi) { + super(server, services, deadServers, hsi, EventType.M_META_SERVER_SHUTDOWN); + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (working copy) @@ -127,7 +127,9 @@ M_ZK_REGION_OFFLINE (50), // Master adds this region as offline in ZK // Master controlled events to be executed on the master - M_SERVER_SHUTDOWN (70); // Master is processing shutdown of a RS + M_SERVER_SHUTDOWN (70), // Master is processing shutdown of a RS + M_ROOT_SERVER_SHUTDOWN (71), // Master is processing shutdown of RS hosting -ROOT- + M_META_SERVER_SHUTDOWN (72); // Master is processing shutdown of RS hosting .META. /** * Constructor Index: src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (working copy) @@ -27,7 +27,6 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -77,6 +76,8 @@ MASTER_SERVER_OPERATIONS (3), MASTER_TABLE_OPERATIONS (4), MASTER_RS_SHUTDOWN (5), + MASTER_ROOT_SERVER_OPERATIONS (6), + MASTER_META_SERVER_OPERATIONS (7), // RegionServer executor services RS_OPEN_REGION (20), @@ -115,6 +116,12 @@ case M_SERVER_SHUTDOWN: return ExecutorType.MASTER_SERVER_OPERATIONS; + case M_ROOT_SERVER_SHUTDOWN: + return ExecutorType.MASTER_ROOT_SERVER_OPERATIONS; + + case M_META_SERVER_SHUTDOWN: + return ExecutorType.MASTER_META_SERVER_OPERATIONS; + case C_M_DELETE_TABLE: case C_M_DISABLE_TABLE: case C_M_ENABLE_TABLE: Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (working copy) @@ -494,8 +494,7 @@ if (metaHsa == null) { LOG.info(".META. is not assigned; continuing"); } else if (hsi.getServerAddress().equals(metaHsa)) { - LOG.info(hsi.getServerName() + " carrying .META.; unsetting " + - ".META. location"); + LOG.info(hsi.getServerName() + " carrying .META.; unsetting"); result.setSecond(true); resetMetaLocation(); } Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 1027291) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -246,9 +246,12 @@ throw e; } } catch (RemoteException re) { - if (re.unwrapRemoteException() instanceof NotServingRegionException) { + IOException ioe = re.unwrapRemoteException(); + if (ioe instanceof NotServingRegionException) { // Treat this NSRE as unavailable table. Catch and fall through to // return null below + } else if (ioe.getMessage().contains("Server not running")) { + // Treat as unavailable table. } else { throw re; }