Index: src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (revision 1241786) +++ src/main/java/org/apache/hadoop/hbase/executor/EventHandler.java (working copy) @@ -107,6 +107,7 @@ RS_ZK_REGION_CLOSED (2), // RS has finished closing a region RS_ZK_REGION_OPENING (3), // RS is in process of opening a region RS_ZK_REGION_OPENED (4), // RS has finished opening a region + RS_ZK_REGION_FAILED_OPEN (5), // RS failed to open a region // Messages originating from Master to RS M_RS_OPEN_REGION (20), // Master asking RS to open a region Index: src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (revision 1241786) +++ src/main/java/org/apache/hadoop/hbase/executor/ExecutorService.java (working copy) @@ -107,6 +107,7 @@ // Master executor services case RS_ZK_REGION_CLOSED: + case RS_ZK_REGION_FAILED_OPEN: return ExecutorType.MASTER_CLOSE_REGION; case RS_ZK_REGION_OPENED: Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1241786) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -24,6 +24,7 @@ import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -138,6 +139,10 @@ new TreeMap(); private final ExecutorService executorService; + + private List ignoreStatesRSOffline = Arrays + .asList(new EventType[] { EventType.RS_ZK_REGION_FAILED_OPEN, + EventType.RS_ZK_REGION_CLOSED }); /** * Constructs a new assignment manager. @@ -346,6 +351,7 @@ break; case RS_ZK_REGION_CLOSED: + case RS_ZK_REGION_FAILED_OPEN: // Region is closed, insert into RIT and handle it addToRITandCallClose(regionInfo, RegionState.State.CLOSED, data); break; @@ -473,7 +479,8 @@ } // Verify this is a known server if (!serverManager.isServerOnline(data.getServerName()) && - !this.master.getServerName().equals(data.getServerName())) { + !this.master.getServerName().equals(data.getServerName()) + && !ignoreStatesRSOffline.contains(data.getEventType())) { LOG.warn("Attempted to handle region transition for server but " + "server is not online: " + Bytes.toString(data.getRegionName())); return; @@ -541,6 +548,20 @@ // Transition to OPENING (or update stamp if already OPENING) regionState.update(RegionState.State.OPENING, data.getStamp()); break; + case RS_ZK_REGION_FAILED_OPEN: + if (regionState == null + || (!regionState.isPendingOpen() && !regionState.isOpening())) { + LOG.warn("Received FAILED_OPEN for region " + prettyPrintedRegionName + + " from server " + data.getServerName() + " but region was in " + + " the state " + regionState + + " and not in PENDING_OPEN or OPENING"); + return; + } + // Handle this the same as if it were opened and then closed. + regionState.update(RegionState.State.CLOSED, data.getStamp()); + this.executorService.submit(new ClosedRegionHandler(master, this, + regionState.getRegion())); + break; case RS_ZK_REGION_OPENED: // Should see OPENED after OPENING but possible after PENDING_OPEN @@ -1996,8 +2017,8 @@ LOG.debug("Region has transitioned to OPENED, allowing " + "watched event handlers to process"); break; - } else if (data.getEventType() != - EventType.RS_ZK_REGION_OPENING) { + } else if (data.getEventType() != EventType.RS_ZK_REGION_OPENING + && data.getEventType() != EventType.RS_ZK_REGION_FAILED_OPEN) { LOG.warn("While timing out a region in state OPENING, " + "found ZK node in unexpected state: " + data.getEventType()); Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (revision 1241786) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (working copy) @@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.executor.EventHandler; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionServerServices; import org.apache.hadoop.hbase.util.CancelableProgressable; @@ -95,13 +96,17 @@ if (!transitionZookeeperOfflineToOpening(encodedName)) { LOG.warn("Region was hijacked? It no longer exists, encodedName=" + encodedName); + tryTransitionToFailedOpen(regionInfo); return; } // Open region. After a successful open, failures in subsequent // processing needs to do a close as part of cleanup. region = openRegion(); - if (region == null) return; + if (region == null) { + tryTransitionToFailedOpen(regionInfo); + return; + } boolean failed = true; if (tickleOpening("post_region_open")) { if (updateMeta(region)) failed = false; @@ -110,10 +115,17 @@ if (failed || this.server.isStopped() || this.rsServices.isStopping()) { cleanupFailedOpen(region); + tryTransitionToFailedOpen(regionInfo); return; } if (!transitionToOpened(region)) { + // If we fail to transition to opened, it's because of one of two cases: + // (a) we lost our ZK lease + // OR (b) someone else opened the region before us + // In either case, we don't need to transition to FAILED_OPEN state. + // In case (a), the Master will process us as a dead server. In case + // (b) the region is already being handled elsewhere anyway. cleanupFailedOpen(region); return; } @@ -125,6 +137,35 @@ remove(this.regionInfo.getEncodedNameAsBytes()); } } + /** + * @param Region we're working on. + * This is not guaranteed to succeed, we just do our best. + * @return whether znode is successfully transitioned to FAILED_OPEN state. + */ + private boolean tryTransitionToFailedOpen(final HRegionInfo hri) { + boolean result = false; + final String name = hri.getRegionNameAsString(); + try { + LOG.info("Opening of region " + hri + + " failed, marking as FAILED_OPEN in ZK"); + if (ZKAssign.transitionNode( + this.server.getZooKeeper(), hri, + this.server.getServerName(), + EventType.RS_ZK_REGION_OPENING, + EventType.RS_ZK_REGION_FAILED_OPEN, + this.version) == -1) { + LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " + + "It's likely that the master already timed out this open " + + "attempt, and thus another RS already has the region."); + } else { + result = true; + } + } catch (KeeperException e) { + LOG.error("Failed transitioning node " + name + + " from OPENING to FAILED_OPEN", e); + } + return result; + } /** * Update ZK, ROOT or META. This can take a while if for example the @@ -133,7 +174,7 @@ * state meantime so master doesn't timeout our region-in-transition. * Caller must cleanup region if this fails. */ - private boolean updateMeta(final HRegion r) { + boolean updateMeta(final HRegion r) { if (this.server.isStopped() || this.rsServices.isStopping()) { return false; } @@ -285,11 +326,12 @@ return tickleOpening("open_region_progress"); } }); - } catch (IOException e) { - // We failed open. Let our znode expire in regions-in-transition and - // Master will assign elsewhere. Presumes nothing to close. + } catch (Throwable t) { + // We failed open. Our caller will see the 'null' return value + // and transition the node back to FAILED_OPEN. If that fails, + // we rely on the Timeout Monitor in the master to reassign. LOG.error("Failed open of region=" + - this.regionInfo.getRegionNameAsString(), e); + this.regionInfo.getRegionNameAsString(), t); } return region; } Index: src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (revision 1241786) +++ src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (working copy) @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase.regionserver.handler; +import static org.junit.Assert.assertEquals; + import java.io.IOException; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -26,8 +28,11 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.executor.RegionTransitionData; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionServerServices; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MockRegionServerServices; import org.apache.hadoop.hbase.util.MockServer; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -36,6 +41,7 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -44,14 +50,31 @@ */ public class TestOpenRegionHandler { private final static HBaseTestingUtility HTU = new HBaseTestingUtility(); + private static HTableDescriptor TEST_HTD; + private HRegionInfo TEST_HRI; + + private int testIndex = 0; @BeforeClass public static void before() throws Exception { HTU.startMiniZKCluster(); + TEST_HTD = new HTableDescriptor("TestOpenRegionHandler.java"); } @AfterClass public static void after() throws IOException { + TEST_HTD = null; HTU.shutdownMiniZKCluster(); } + /** + * Before each test, use a different HRI, so the different tests + * don't interfere with each other. This allows us to use just + * a single ZK cluster for the whole suite. + */ + @Before + public void setupHRI() { + TEST_HRI = new HRegionInfo(TEST_HTD, Bytes.toBytes(testIndex), Bytes + .toBytes(testIndex + 1)); + testIndex++; + } /** * Test the openregionhandler can deal with its znode being yanked out from @@ -94,4 +117,55 @@ // post OPENING; again will expect it to come back w/o NPE or exception. handler.process(); } + @Test + public void testFailedOpenRegion() throws Exception { + Server server = new MockServer(); + RegionServerServices rsServices = new MockRegionServerServices(); + + // Create it OFFLINE, which is what it expects + ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server + .getServerName()); + + // Create the handler + OpenRegionHandler handler = + new OpenRegionHandler(server, rsServices, TEST_HRI) { + @Override + HRegion openRegion() { + // Fake failure of opening a region due to an IOE, which is caught + return null; + } + }; + handler.process(); + + // Handler should have transitioned it to FAILED_OPEN + RegionTransitionData data = + ZKAssign.getData(server.getZooKeeper(), TEST_HRI.getEncodedName()); + assertEquals(EventType.RS_ZK_REGION_FAILED_OPEN, data.getEventType()); + } + + @Test + public void testFailedUpdateMeta() throws Exception { + Server server = new MockServer(); + RegionServerServices rsServices = new MockRegionServerServices(); + + // Create it OFFLINE, which is what it expects + ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server + .getServerName()); + + // Create the handler + OpenRegionHandler handler = + new OpenRegionHandler(server, rsServices, TEST_HRI) { + @Override + boolean updateMeta(final HRegion r) { + // Fake failure of updating META + return false; + } + }; + handler.process(); + + // Handler should have transitioned it to FAILED_OPEN + RegionTransitionData data = + ZKAssign.getData(server.getZooKeeper(), TEST_HRI.getEncodedName()); + assertEquals(EventType.RS_ZK_REGION_FAILED_OPEN, data.getEventType()); + } }