Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (revision 1448884) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (working copy) @@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionServerAccounting; import org.apache.hadoop.hbase.regionserver.RegionServerServices; -import org.apache.hadoop.hbase.regionserver.wal.HLog; import org.apache.hadoop.hbase.util.CancelableProgressable; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; @@ -87,8 +86,9 @@ @Override public void process() throws IOException { boolean openSuccessful = false; - boolean transitionToFailedOpen = false; + boolean transitionedToOpening = false; final String regionName = regionInfo.getRegionNameAsString(); + HRegion region = null; try { if (this.server.isStopped() || this.rsServices.isStopping()) { @@ -106,7 +106,6 @@ LOG.error("Region " + encodedName + " was already online when we started processing the opening. " + "Marking this new attempt as failed"); - tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, this.version); return; } @@ -115,23 +114,19 @@ // Calling transitionZookeeperOfflineToOpening initializes this.version. if (!isRegionStillOpening()){ LOG.error("Region " + encodedName + " opening cancelled"); - tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, this.version); return; } if (!transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) { LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName); // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more. - tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, this.version); return; } - + transitionedToOpening = true; // Open region. After a successful open, failures in subsequent // processing needs to do a close as part of cleanup. - HRegion region = openRegion(); + region = openRegion(); if (region == null) { - tryTransitionFromOpeningToFailedOpen(regionInfo); - transitionToFailedOpen = true; return; } boolean failed = true; @@ -142,9 +137,6 @@ } if (failed || this.server.isStopped() || this.rsServices.isStopping()) { - cleanupFailedOpen(region); - tryTransitionFromOpeningToFailedOpen(regionInfo); - transitionToFailedOpen = true; return; } @@ -155,9 +147,6 @@ // OR (b) someone else opened the region before us // OR (c) someone cancelled the open // In all cases, we try to transition to failed_open to be safe. - cleanupFailedOpen(region); - tryTransitionFromOpeningToFailedOpen(regionInfo); - transitionToFailedOpen = true; return; } @@ -180,6 +169,10 @@ } finally { + // Do all clean up here + if (!openSuccessful) { + doCleanUpOnFailedOpen(region, transitionedToOpening); + } final Boolean current = this.rsServices.getRegionsInTransitionInRS(). remove(this.regionInfo.getEncodedNameAsBytes()); @@ -191,19 +184,35 @@ // 1) this.rsServices.addToOnlineRegions(region); // 2) the ZK state. if (openSuccessful) { - if (current == null) { // Should NEVER happen, but let's be paranoid. - LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" + - regionName - ); - } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're really unlucky. + if (current == null) { // Should NEVER happen, but let's be paranoid. + LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" + + regionName); + } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're + // really unlucky. LOG.error("Race condition: we've finished to open a region, while a close was requested " - + " on region=" + regionName + ". It can be a critical error, as a region that" + - " should be closed is now opened." - ); + + " on region=" + regionName + ". It can be a critical error, as a region that" + + " should be closed is now opened."); } - } else if (transitionToFailedOpen == false) { + } + } + } + + private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening) + throws IOException { + if (transitionedToOpening) { + try { + if (region != null) { + cleanupFailedOpen(region); + } + } finally { + // Even if cleanupFailed open fails we need to do this transition + // See HBASE-7698 tryTransitionFromOpeningToFailedOpen(regionInfo); } + } else { + // If still transition to OPENING is not done, we need to transition znode + // to FAILED_OPEN + tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode); } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (revision 1448884) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java (working copy) @@ -212,5 +212,29 @@ TEST_HRI.getEncodedName())); assertEquals(EventType.RS_ZK_REGION_FAILED_OPEN, rt.getEventType()); } + + @Test + public void testTransitionToFailedOpenFromOffline() throws Exception { + Server server = new MockServer(HTU); + RegionServerServices rsServices = new MockRegionServerServices(server.getZooKeeper(), + server.getServerName()); + // Create it OFFLINE, which is what it expects + ZKAssign.createNodeOffline(server.getZooKeeper(), TEST_HRI, server.getServerName()); + // Create the handler + OpenRegionHandler handler = new OpenRegionHandler(server, rsServices, TEST_HRI, TEST_HTD) { + + @Override + boolean transitionZookeeperOfflineToOpening(String encodedName, int versionOfOfflineNode) { + return false; + } + }; + rsServices.getRegionsInTransitionInRS().put(TEST_HRI.getEncodedNameAsBytes(), Boolean.TRUE); + + handler.process(); + + RegionTransition rt = RegionTransition.parseFrom(ZKAssign.getData(server.getZooKeeper(), + TEST_HRI.getEncodedName())); + assertEquals(EventType.RS_ZK_REGION_FAILED_OPEN, rt.getEventType()); + } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (revision 1448884) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (working copy) @@ -51,10 +51,16 @@ new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR); private HFileSystem hfs = null; private ZooKeeperWatcher zkw = null; + private ServerName serverName = null; - public MockRegionServerServices(ZooKeeperWatcher zkw){ + public MockRegionServerServices(ZooKeeperWatcher zkw) { this.zkw = zkw; } + + public MockRegionServerServices(ZooKeeperWatcher zkw, ServerName serverName) { + this.zkw = zkw; + this.serverName = serverName; + } public MockRegionServerServices(){ this(null); @@ -126,7 +132,7 @@ @Override public ServerName getServerName() { - return null; + return this.serverName; } @Override