Index: src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/catalog/CatalogTracker.java (working copy) @@ -29,19 +29,16 @@ import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerAddress; -import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.zookeeper.MetaNodeTracker; import org.apache.hadoop.hbase.zookeeper.RootRegionTracker; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.hadoop.ipc.RemoteException; -import org.apache.zookeeper.KeeperException; /** * Tracks the availability of the catalog tables -ROOT- and @@ -63,6 +60,12 @@ private final RootRegionTracker rootRegionTracker; private final MetaNodeTracker metaNodeTracker; private final AtomicBoolean metaAvailable = new AtomicBoolean(false); + /** + * Do not clear this address once set. Let it be cleared by + * {@link #setMetaLocation(HServerAddress)} only. Its needed when we do + * server shutdown processing -- we need to know who had .META. last. If you + * want to know if the address is good, rely on {@link #metaAvailable} value. + */ private HServerAddress metaLocation; private final int defaultTimeout; private boolean stopped = false; @@ -365,7 +368,6 @@ private void resetMetaLocation() { LOG.info("Current cached META location is not valid, resetting"); this.metaAvailable.set(false); - this.metaLocation = null; } private void setMetaLocation(HServerAddress metaLocation) { @@ -471,37 +473,6 @@ return getMetaServerConnection(true) != null; } - /** - * Check if hsi was carrying -ROOT- or - * .META. and if so, clear out old locations. - * @param hsi Server that has crashed/shutdown. - * @throws InterruptedException - * @throws KeeperException - * @return Pair of booleans; if this server was carrying root, then first - * boolean is set, if server was carrying meta, then second boolean set. - */ - public Pair processServerShutdown(final HServerInfo hsi) - throws InterruptedException, KeeperException { - Pair result = new Pair(false, false); - HServerAddress rootHsa = getRootLocation(); - if (rootHsa == null) { - LOG.info("-ROOT- is not assigned; continuing"); - } else if (hsi.getServerAddress().equals(rootHsa)) { - result.setFirst(true); - LOG.info(hsi.getServerName() + " carrying -ROOT-; unsetting"); - } - HServerAddress metaHsa = getMetaLocation(); - if (metaHsa == null) { - LOG.info(".META. is not assigned; continuing"); - } else if (hsi.getServerAddress().equals(metaHsa)) { - LOG.info(hsi.getServerName() + " carrying .META.; unsetting " + - ".META. location"); - result.setSecond(true); - resetMetaLocation(); - } - return result; - } - MetaNodeTracker getMetaNodeTracker() { return this.metaNodeTracker; } Index: src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/catalog/MetaReader.java (working copy) @@ -246,9 +246,12 @@ throw e; } } catch (RemoteException re) { - if (re.unwrapRemoteException() instanceof NotServingRegionException) { + IOException ioe = re.unwrapRemoteException(); + if (ioe instanceof NotServingRegionException) { // Treat this NSRE as unavailable table. Catch and fall through to // return null below + } else if (ioe.getMessage().contains("Server not running")) { + // Treat as unavailable table. } else { throw re; } Index: src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (working copy) @@ -127,7 +127,8 @@ M_ZK_REGION_OFFLINE (50), // Master adds this region as offline in ZK // Master controlled events to be executed on the master - M_SERVER_SHUTDOWN (70); // Master is processing shutdown of a RS + M_SERVER_SHUTDOWN (70), // Master is processing shutdown of a RS + M_META_SERVER_SHUTDOWN (72); // Master is processing shutdown of RS hosting a meta region (-ROOT- or .META.). /** * Constructor Index: src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (working copy) @@ -27,7 +27,6 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -77,6 +76,7 @@ MASTER_SERVER_OPERATIONS (3), MASTER_TABLE_OPERATIONS (4), MASTER_RS_SHUTDOWN (5), + MASTER_META_SERVER_OPERATIONS (6), // RegionServer executor services RS_OPEN_REGION (20), @@ -115,6 +115,9 @@ case M_SERVER_SHUTDOWN: return ExecutorType.MASTER_SERVER_OPERATIONS; + case M_META_SERVER_SHUTDOWN: + return ExecutorType.MASTER_META_SERVER_OPERATIONS; + case C_M_DELETE_TABLE: case C_M_DISABLE_TABLE: case C_M_ENABLE_TABLE: Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.catalog.CatalogTracker; @@ -96,13 +97,13 @@ private TimeoutMonitor timeoutMonitor; /** Regions currently in transition. */ - private final ConcurrentSkipListMap regionsInTransition = + final ConcurrentSkipListMap regionsInTransition = new ConcurrentSkipListMap(); /** Plans for region movement. Key is the encoded version of a region name*/ // TODO: When do plans get cleaned out? Ever? In server open and in server // shutdown processing -- St.Ack - protected final ConcurrentNavigableMap regionPlans = + final ConcurrentNavigableMap regionPlans = new ConcurrentSkipListMap(); /** Set of tables that have been disabled. */ @@ -315,7 +316,7 @@ if (!serverManager.isServerOnline(data.getServerName()) && !this.master.getServerName().equals(data.getServerName())) { LOG.warn("Attempted to handle region transition for server but " + - "server is not online: " + data); + "server is not online: " + data.getRegionName()); return; } String encodedName = HRegionInfo.encodeRegionName(data.getRegionName()); @@ -597,9 +598,8 @@ ZKAssign.deleteOfflineNode(watcher, regionInfo.getEncodedName()); } } catch (KeeperException.NoNodeException nne) { - LOG.warn("Tried to delete closed node for " + regionInfo + " but it " + - "does not exist"); - return; + LOG.debug("Tried to delete closed node for " + regionInfo + " but it " + + "does not exist so just offlining"); } catch (KeeperException e) { this.master.abort("Error deleting CLOSED node in ZK", e); } @@ -976,15 +976,29 @@ } // Send CLOSE RPC try { - serverManager.sendRegionClose(regions.get(region), state.getRegion()); + if(!serverManager.sendRegionClose(regions.get(region), + state.getRegion())) { + throw new NotServingRegionException("Server failed to close region"); + } + } catch (NotServingRegionException nsre) { + // Did not CLOSE, so set region offline and assign it + LOG.debug("Attempted to send CLOSE for region " + + region.getRegionNameAsString() + " but failed, setting region as " + + "OFFLINE and reassigning"); + synchronized (regionsInTransition) { + forceRegionStateToOffline(region); + assign(region); + } } catch (IOException e) { // For now call abort if unexpected exception -- radical, but will get fellas attention. // St.Ack 20101012 + // I don't think IOE can happen anymore, only NSRE IOE is used here + // should be able to remove this at least. jgray 20101024 this.master.abort("Remote unexpected exception", e); } catch (Throwable t) { // For now call abort if unexpected exception -- radical, but will get fellas attention. // St.Ack 20101012 - this.master.abort("Unexpected exception", t); + this.master.abort("Remote unexpected exception", t); } } @@ -1435,14 +1449,40 @@ assign(regionState.getRegion()); break; case PENDING_OPEN: + LOG.info("Region has been PENDING_OPEN for too " + + "long, reassigning region=" + + regionInfo.getRegionNameAsString()); + // Should have a ZK node in OFFLINE state or no node at all + try { + if (ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName())) && + !ZKAssign.verifyRegionState(watcher, regionInfo, + EventType.M_ZK_REGION_OFFLINE)) { + LOG.info("Region exists and not in expected OFFLINE " + + "state so skipping timeout, region=" + + regionInfo.getRegionNameAsString()); + break; + } + } catch (KeeperException ke) { + LOG.error("Unexpected ZK exception timing out " + + "PENDING_CLOSE region", + ke); + break; + } + AssignmentManager.this.setOffline(regionState.getRegion()); + regionState.update(RegionState.State.OFFLINE); + assign(regionState.getRegion()); + break; case OPENING: - LOG.info("Region has been PENDING_OPEN or OPENING for too " + + LOG.info("Region has been OPENING for too " + "long, reassigning region=" + regionInfo.getRegionNameAsString()); - // There could be two cases. No ZK node or ZK in CLOSING. + // Should have a ZK node in OPENING state try { - if (ZKUtil.checkExists(watcher, watcher.assignmentZNode) - != -1 && + if (ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName())) && ZKAssign.transitionNode(watcher, regionInfo, HMaster.MASTER, EventType.RS_ZK_REGION_OPENING, EventType.M_ZK_REGION_OFFLINE, -1) == -1) { @@ -1465,8 +1505,27 @@ "not happen; region=" + regionInfo.getRegionNameAsString()); break; case PENDING_CLOSE: + LOG.info("Region has been PENDING_CLOSE for too " + + "long, running forced unassign again on region=" + + regionInfo.getRegionNameAsString()); + try { + // If the server got the RPC, it will transition the node + // to CLOSING, so only do something here if no node exists + if (!ZKUtil.watchAndCheckExists(watcher, + ZKAssign.getNodeName(watcher, + regionInfo.getEncodedName()))) { + unassign(regionInfo, true); + } + } catch (NoNodeException e) { + LOG.debug("Node no longer existed so not forcing another " + + "unassignment"); + } catch (KeeperException e) { + LOG.warn("Unexpected ZK exception timing out a region " + + "close", e); + } + break; case CLOSING: - LOG.info("Region has been PENDING_CLOSE or CLOSING for too " + + LOG.info("Region has been CLOSING for too " + "long, running forced unassign again on region=" + regionInfo.getRegionNameAsString()); try { @@ -1500,6 +1559,7 @@ Map.Entry e = i.next(); if (e.getValue().getDestination().equals(hsi)) { // Use iterator's remove else we'll get CME + LOG.info("REMOVING PLAN " + e.getValue()); i.remove(); } } Index: src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -413,7 +413,8 @@ this.catalogTracker.waitForRoot(); assigned++; } - LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit); + LOG.info("-ROOT- assigned=" + assigned + ", rit=" + rit + + ", location=" + catalogTracker.getRootLocation()); // Work on meta region rit = this.assignmentManager. @@ -426,7 +427,8 @@ this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO); assigned++; } - LOG.info(".META. assigned=" + assigned + ", rit=" + rit); + LOG.info(".META. assigned=" + assigned + ", rit=" + rit + + ", location=" + catalogTracker.getMetaLocation()); return assigned; } @@ -502,6 +504,8 @@ conf.getInt("hbase.master.executor.closeregion.threads", 5)); this.executorService.startExecutorService(ExecutorType.MASTER_SERVER_OPERATIONS, conf.getInt("hbase.master.executor.serverops.threads", 3)); + this.executorService.startExecutorService(ExecutorType.MASTER_META_SERVER_OPERATIONS, + conf.getInt("hbase.master.executor.serverops.threads", 2)); this.executorService.startExecutorService(ExecutorType.MASTER_TABLE_OPERATIONS, conf.getInt("hbase.master.executor.tableops.threads", 3)); Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -41,15 +41,19 @@ import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.YouAreDeadException; +import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.ipc.HRegionInterface; +import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.KeeperException; /** * The ServerManager class manages info about region servers - HServerInfo, @@ -490,10 +494,36 @@ } return; } - this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, + CatalogTracker ct = this.master.getCatalogTracker(); + // Was this server carrying root? + boolean carryingRoot; + try { + HServerAddress address = ct.getRootLocation(); + carryingRoot = address != null && + hsi.getServerAddress().equals(address); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOG.info("Interrupted"); + return; + } + // Was this server carrying meta? Can't ask CatalogTracker because it + // may have reset the meta location as null already (it may have already + // run into fact that meta is dead). I can ask assignment manager. It + // has an inmemory list of who has what. This list will be cleared as we + // process the dead server but should be find asking it now. + HServerAddress address = ct.getMetaLocation(); + boolean carryingMeta = + address != null && hsi.getServerAddress().equals(address); + if (carryingRoot || carryingMeta) { + this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, + this.services, this.deadservers, info, carryingRoot, carryingMeta)); + } else { + this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, this.services, this.deadservers, info)); + } LOG.debug("Added=" + serverName + - " to dead servers, submitted shutdown handler to be executed"); + " to dead servers, submitted shutdown handler to be executed, root=" + + carryingRoot + ", meta=" + carryingMeta); } // RPC methods to region servers @@ -546,16 +576,17 @@ * @return true if server acknowledged close, false if not * @throws IOException */ - public void sendRegionClose(HServerInfo server, HRegionInfo region) + public boolean sendRegionClose(HServerInfo server, HRegionInfo region) throws IOException { + if (server == null) return false; HRegionInterface hri = getServerConnection(server); if(hri == null) { LOG.warn("Attempting to send CLOSE RPC to server " + server.getServerName() + " failed because no RPC connection found " + "to this server"); - return; + return false; } - hri.closeRegion(region); + return hri.closeRegion(region); } /** Index: src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java (revision 0) @@ -0,0 +1,53 @@ +/** + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import org.apache.hadoop.hbase.HServerInfo; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.master.DeadServer; +import org.apache.hadoop.hbase.master.MasterServices; + +/** + * Shutdown handler for the server hosting -ROOT-, + * .META., or both. + */ +public class MetaServerShutdownHandler extends ServerShutdownHandler { + private final boolean carryingRoot; + private final boolean carryingMeta; + + public MetaServerShutdownHandler(final Server server, + final MasterServices services, + final DeadServer deadServers, final HServerInfo hsi, + final boolean carryingRoot, final boolean carryingMeta) { + super(server, services, deadServers, hsi, EventType.M_META_SERVER_SHUTDOWN); + this.carryingRoot = carryingRoot; + this.carryingMeta = carryingMeta; + } + + @Override + boolean isCarryingRoot() { + return this.carryingRoot; + } + + @Override + boolean isCarryingMeta() { + return this.carryingMeta; + } +} \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (working copy) @@ -57,7 +57,12 @@ public ServerShutdownHandler(final Server server, final MasterServices services, final DeadServer deadServers, final HServerInfo hsi) { - super(server, EventType.M_SERVER_SHUTDOWN); + this(server, services, deadServers, hsi, EventType.M_SERVER_SHUTDOWN); + } + + ServerShutdownHandler(final Server server, final MasterServices services, + final DeadServer deadServers, final HServerInfo hsi, EventType type) { + super(server, type); this.hsi = hsi; this.server = server; this.services = services; @@ -67,19 +72,22 @@ } } + /** + * @return True if the server we are processing was carrying -ROOT- + */ + boolean isCarryingRoot() { + return false; + } + + /** + * @return True if the server we are processing was carrying .META. + */ + boolean isCarryingMeta() { + return false; + } + @Override public void process() throws IOException { - Pair carryingCatalog = null; - try { - carryingCatalog = - this.server.getCatalogTracker().processServerShutdown(this.hsi); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("Interrupted", e); - } catch (KeeperException e) { - this.server.abort("In server shutdown processing", e); - throw new IOException("Aborting", e); - } final String serverName = this.hsi.getServerName(); LOG.info("Splitting logs for " + serverName); @@ -92,7 +100,7 @@ this.services.getAssignmentManager().processServerShutdown(this.hsi); // Assign root and meta if we were carrying them. - if (carryingCatalog.getFirst()) { // -ROOT- + if (isCarryingRoot()) { // -ROOT- try { this.services.getAssignmentManager().assignRoot(); } catch (KeeperException e) { @@ -100,10 +108,10 @@ throw new IOException("Aborting", e); } } - if (carryingCatalog.getSecond()) { // .META. - this.services.getAssignmentManager().assignMeta(); - } + // Carrying meta? + if (isCarryingMeta()) this.services.getAssignmentManager().assignMeta(); + // Wait on meta to come online; we need it to progress. try { this.server.getCatalogTracker().waitForMeta(); Index: src/main/java/org/apache/hadoop/hbase/zookeeper/MetaNodeTracker.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/MetaNodeTracker.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/MetaNodeTracker.java (working copy) @@ -57,6 +57,7 @@ @Override public void nodeDeleted(String path) { + super.nodeDeleted(path); if (!path.equals(node)) return; LOG.info("Detected completed assignment of META, notifying catalog tracker"); try { Index: src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (revision 1027351) +++ src/main/java/org/apache/hadoop/hbase/zookeeper/ZKAssign.java (working copy) @@ -97,7 +97,7 @@ * @param regionName region name * @return full path node name */ - private static String getNodeName(ZooKeeperWatcher zkw, String regionName) { + public static String getNodeName(ZooKeeperWatcher zkw, String regionName) { return ZKUtil.joinZNode(zkw.assignmentZNode, regionName); } @@ -762,4 +762,44 @@ Thread.sleep(200); } } + + /** + * Verifies that the specified region is in the specified state in ZooKeeper. + *

+ * Returns true if region is in transition and in the specified state in + * ZooKeeper. Returns false if the region does not exist in ZK or is in + * a different state. + *

+ * Method synchronizes() with ZK so will yield an up-to-date result but is + * a slow read. + * @param watcher + * @param region + * @param expectedState + * @return true if region exists and is in expected state + */ + public static boolean verifyRegionState(ZooKeeperWatcher zkw, + HRegionInfo region, EventType expectedState) + throws KeeperException { + String encoded = region.getEncodedName(); + + String node = getNodeName(zkw, encoded); + zkw.sync(node); + + // Read existing data of the node + byte [] existingBytes = null; + try { + existingBytes = ZKUtil.getDataAndWatch(zkw, node); + } catch (KeeperException.NoNodeException nne) { + return false; + } catch (KeeperException e) { + throw e; + } + if (existingBytes == null) return false; + RegionTransitionData existingData = + RegionTransitionData.fromBytes(existingBytes); + if (existingData.getEventType() == expectedState){ + return true; + } + return false; + } } Index: src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (revision 1027351) +++ src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java (working copy) @@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.HServerInfo; import org.apache.hadoop.hbase.KeyValue; @@ -105,6 +104,26 @@ } /** + * Test that we get notification if .META. moves. + * @throws IOException + * @throws InterruptedException + * @throws KeeperException + */ + @Test public void testThatIfMETAMovesWeAreNotified() + throws IOException, InterruptedException, KeeperException { + HConnection connection = Mockito.mock(HConnection.class); + final CatalogTracker ct = constructAndStartCatalogTracker(connection); + try { + RootLocationEditor.setRootLocation(this.watcher, + new HServerAddress("example.com:1234")); + } finally { + // Clean out root location or later tests will be confused... they presume + // start fresh in zk. + RootLocationEditor.deleteRootLocation(this.watcher); + } + } + + /** * Test interruptable while blocking wait on root and meta. * @throws IOException * @throws InterruptedException Index: src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (revision 1027351) +++ src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState; import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; @@ -794,10 +795,35 @@ cluster.waitForActiveAndReadyMaster(); log("Master is ready"); + // Let's add some weird states to master in-memory state + + // PENDING_OPEN and enabled + region = enabledRegions.remove(0); + regionsThatShouldBeOnline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_OPEN)); + // PENDING_OPEN and disabled + region = disabledRegions.remove(0); + regionsThatShouldBeOffline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_OPEN)); + // PENDING_CLOSE and enabled + region = enabledRegions.remove(0); + regionsThatShouldBeOnline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_CLOSE)); + // PENDING_CLOSE and disabled + region = disabledRegions.remove(0); + regionsThatShouldBeOffline.add(region); + master.assignmentManager.regionsInTransition.put(region.getEncodedName(), + new RegionState(region, RegionState.State.PENDING_CLOSE)); + // Failover should be completed, now wait for no RIT log("Waiting for no more RIT"); ZKAssign.blockUntilNoRIT(zkw); - log("No more RIT in ZK, now doing final test verification"); + log("No more RIT in ZK"); + master.assignmentManager.waitUntilNoRegionsInTransition(120000); + log("No more RIT in RIT map, doing final test verification"); // Grab all the regions that are online across RSs Set onlineRegions = new TreeSet();