Index: src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (revision 1227943) +++ src/test/java/org/apache/hadoop/hbase/util/MockRegionServerServices.java (working copy) @@ -63,6 +63,7 @@ @Override public void postOpenDeployTasks(HRegion r, CatalogTracker ct, boolean daughter) throws KeeperException, IOException { + addToOnlineRegions(r); } @Override Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (working copy) @@ -2482,15 +2482,31 @@ @QosPriority(priority=HIGH_QOS) public boolean closeRegion(HRegionInfo region) throws IOException { - return closeRegion(region, true); + return closeRegion(region, true, -1); } @Override @QosPriority(priority=HIGH_QOS) + public boolean closeRegion(final HRegionInfo region, + final int versionOfClosingNode) + throws IOException { + return closeRegion(region, true, versionOfClosingNode); + } + + @Override + @QosPriority(priority=HIGH_QOS) public boolean closeRegion(HRegionInfo region, final boolean zk) throws IOException { + return closeRegion(region, zk, -1); + } + + @QosPriority(priority=HIGH_QOS) + protected boolean closeRegion(HRegionInfo region, final boolean zk, + final int versionOfClosingNode) + throws IOException { checkOpen(); - LOG.info("Received close region: " + region.getRegionNameAsString()); + LOG.info("Received close region: " + region.getRegionNameAsString() + + ". Version of ZK closing node:" + versionOfClosingNode); boolean hasit = this.onlineRegions.containsKey(region.getEncodedName()); if (!hasit) { LOG.warn("Received close for region we are not serving; " + @@ -2499,12 +2515,13 @@ + region.getRegionNameAsString() + " but we are not serving it"); } checkIfRegionInTransition(region, CLOSE); - return closeRegion(region, false, zk); + return closeRegion(region, false, zk, versionOfClosingNode); } @Override @QosPriority(priority=HIGH_QOS) - public boolean closeRegion(byte[] encodedRegionName, boolean zk) throws IOException { + public boolean closeRegion(byte[] encodedRegionName, boolean zk) + throws IOException { return closeRegion(encodedRegionName, false, zk); } @@ -2518,6 +2535,23 @@ */ protected boolean closeRegion(HRegionInfo region, final boolean abort, final boolean zk) { + return closeRegion(region, abort, zk, -1); + } + + + /** + * @param region Region to close + * @param abort True if we are aborting + * @param zk True if we are to update zk about the region close; if the close + * was orchestrated by master, then update zk. If the close is being run by + * the regionserver because its going down, don't update zk. + * @param versionOfClosingNode + * the version of znode to compare when RS transitions the znode from + * CLOSING state. + * @return True if closed a region. + */ + protected boolean closeRegion(HRegionInfo region, final boolean abort, + final boolean zk, final int versionOfClosingNode) { if (this.regionsInTransitionInRS.containsKey(region.getEncodedNameAsBytes())) { LOG.warn("Received close for region we are already opening or closing; " + region.getEncodedName()); @@ -2526,11 +2560,14 @@ this.regionsInTransitionInRS.putIfAbsent(region.getEncodedNameAsBytes(), false); CloseRegionHandler crh = null; if (region.isRootRegion()) { - crh = new CloseRootHandler(this, this, region, abort, zk); + crh = new CloseRootHandler(this, this, region, abort, zk, + versionOfClosingNode); } else if (region.isMetaRegion()) { - crh = new CloseMetaHandler(this, this, region, abort, zk); + crh = new CloseMetaHandler(this, this, region, abort, zk, + versionOfClosingNode); } else { - crh = new CloseRegionHandler(this, this, region, abort, zk); + crh = new CloseRegionHandler(this, this, region, abort, zk, + versionOfClosingNode); } this.service.submit(crh); return true; Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/OpenRegionHandler.java (working copy) @@ -263,8 +263,8 @@ /** * @param r Region we're working on. - * @return Transition znode to OPENED state. - * @throws IOException + * @return whether znode is successfully transitioned to OPENED state. + * @throws IOException */ private boolean transitionToOpened(final HRegion r) throws IOException { boolean result = false; @@ -290,11 +290,11 @@ } return result; } - + /** * @param Region we're working on. * This is not guaranteed to succeed, we just do our best. - * @return Transition znode to CLOSED state. + * @return whether znode is successfully transitioned to FAILED_OPEN state. */ private boolean tryTransitionToFailedOpen(final HRegionInfo hri) { boolean result = false; Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRegionHandler.java (working copy) @@ -43,6 +43,7 @@ private static final Log LOG = LogFactory.getLog(CloseRegionHandler.class); private final int FAILED = -1; + int expectedVersion = FAILED; private final RegionServerServices rsServices; @@ -61,7 +62,7 @@ // This is executed after receiving an CLOSE RPC from the master. public CloseRegionHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo) { - this(server, rsServices, regionInfo, false, true); + this(server, rsServices, regionInfo, false, true, -1); } /** @@ -74,19 +75,23 @@ */ public CloseRegionHandler(final Server server, final RegionServerServices rsServices, - final HRegionInfo regionInfo, final boolean abort, final boolean zk) { - this(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_REGION); + final HRegionInfo regionInfo, final boolean abort, final boolean zk, + final int versionOfClosingNode) { + this(server, rsServices, regionInfo, abort, zk, versionOfClosingNode, + EventType.M_RS_CLOSE_REGION); } protected CloseRegionHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo, - boolean abort, final boolean zk, EventType eventType) { + boolean abort, final boolean zk, final int versionOfClosingNode, + EventType eventType) { super(server, eventType); this.server = server; this.rsServices = rsServices; this.regionInfo = regionInfo; this.abort = abort; this.zk = zk; + this.expectedVersion = versionOfClosingNode; } public HRegionInfo getRegionInfo() { @@ -107,12 +112,6 @@ return; } - int expectedVersion = FAILED; - if (this.zk) { - expectedVersion = getCurrentVersion(); - if (expectedVersion == FAILED) return; - } - // Close the region try { // TODO: If we need to keep updating CLOSING stamp to prevent against @@ -137,7 +136,7 @@ this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName()); if (this.zk) { - if (setClosedState(expectedVersion, region)) { + if (setClosedState(this.expectedVersion, region)) { LOG.debug("set region closed state in zk successfully for region " + name + " sn name: " + this.server.getServerName()); } else { @@ -183,22 +182,4 @@ return true; } - /** - * Get the node's current version - * @return The expectedVersion. If -1, we failed getting the node - */ - private int getCurrentVersion() { - int expectedVersion = FAILED; - try { - if ((expectedVersion = ZKAssign.getVersion( - server.getZooKeeper(), regionInfo)) == FAILED) { - LOG.warn("Error getting node's version in CLOSING state," + - " aborting close of " + regionInfo.getRegionNameAsString()); - } - } catch (KeeperException e) { - LOG.warn("Error creating node in CLOSING state, aborting close of " + - regionInfo.getRegionNameAsString(), e); - } - return expectedVersion; - } } Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRootHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRootHandler.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseRootHandler.java (working copy) @@ -30,14 +30,15 @@ // This is executed after receiving an CLOSE RPC from the master for root. public CloseRootHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo) { - this(server, rsServices, regionInfo, false, true); + this(server, rsServices, regionInfo, false, true, -1); } // This is called directly by the regionserver when its determined its // shutting down. public CloseRootHandler(final Server server, final RegionServerServices rsServices, HRegionInfo regionInfo, - final boolean abort, final boolean zk) { - super(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_ROOT); + final boolean abort, final boolean zk, final int versionOfClosingNode) { + super(server, rsServices, regionInfo, abort, zk, versionOfClosingNode, + EventType.M_RS_CLOSE_ROOT); } } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseMetaHandler.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseMetaHandler.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/regionserver/handler/CloseMetaHandler.java (working copy) @@ -30,14 +30,15 @@ // Called when master tells us shutdown a region via close rpc public CloseMetaHandler(final Server server, final RegionServerServices rsServices, final HRegionInfo regionInfo) { - this(server, rsServices, regionInfo, false, true); + this(server, rsServices, regionInfo, false, true, -1); } // Called when regionserver determines its to go down; not master orchestrated public CloseMetaHandler(final Server server, final RegionServerServices rsServices, final HRegionInfo regionInfo, - final boolean abort, final boolean zk) { - super(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_META); + final boolean abort, final boolean zk, final int versionOfClosingNode) { + super(server, rsServices, regionInfo, abort, zk, versionOfClosingNode, + EventType.M_RS_CLOSE_META); } } \ No newline at end of file Index: src/main/java/org/apache/hadoop/hbase/master/ServerManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (working copy) @@ -462,11 +462,14 @@ * have the specified region or the region is being split. * @param server server to open a region * @param region region to open + * @param versionOfClosingNode + * the version of znode to compare when RS transitions the znode from + * CLOSING state. * @return true if server acknowledged close, false if not * @throws IOException */ - public boolean sendRegionClose(ServerName server, HRegionInfo region) - throws IOException { + public boolean sendRegionClose(ServerName server, HRegionInfo region, + int versionOfClosingNode) throws IOException { if (server == null) throw new NullPointerException("Passed server is null"); HRegionInterface hri = getServerConnection(server); if (hri == null) { @@ -475,7 +478,7 @@ region.getRegionNameAsString() + " failed because no RPC connection found to this server"); } - return hri.closeRegion(region); + return hri.closeRegion(region, versionOfClosingNode); } /** Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1754,13 +1754,20 @@ String encodedName = region.getEncodedName(); // Grab the state of this region and synchronize on it RegionState state; + int versionOfClosingNode = -1; synchronized (regionsInTransition) { state = regionsInTransition.get(encodedName); if (state == null) { // Create the znode in CLOSING state try { - ZKAssign.createNodeClosing( + versionOfClosingNode = ZKAssign.createNodeClosing( master.getZooKeeper(), region, master.getServerName()); + if (versionOfClosingNode == -1) { + LOG.debug("Attempting to unassign region " + + region.getRegionNameAsString() + " but ZK closing node " + + "can't be created."); + return; + } } catch (KeeperException e) { if (e instanceof NodeExistsException) { // Handle race between master initiated close and regionserver @@ -1811,17 +1818,18 @@ try { // TODO: We should consider making this look more like it does for the // region open where we catch all throwables and never abort - if (serverManager.sendRegionClose(server, state.getRegion())) { + if (serverManager.sendRegionClose(server, state.getRegion(), + versionOfClosingNode)) { LOG.debug("Sent CLOSE to " + server + " for region " + region.getRegionNameAsString()); return; } // This never happens. Currently regionserver close always return true. LOG.warn("Server " + server + " region CLOSE RPC returned false for " + - region.getEncodedName()); + region.getRegionNameAsString()); } catch (NotServingRegionException nsre) { LOG.info("Server " + server + " returned " + nsre + " for " + - region.getEncodedName()); + region.getRegionNameAsString()); // Presume that master has stale data. Presume remote side just split. // Presume that the split message when it comes in will fix up the master's // in memory cluster state. Index: src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (revision 1227943) +++ src/main/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (working copy) @@ -375,6 +375,19 @@ throws IOException; /** + * Closes the specified region. + * @param region region to close + * @param versionOfClosingNode + * the version of znode to compare when RS transitions the znode + * from CLOSING state. + * @return true if closing region, false if not + * @throws IOException + */ + public boolean closeRegion(final HRegionInfo region, + final int versionOfClosingNode) + throws IOException; + + /** * Closes the specified region and will use or not use ZK during the close * according to the specified flag. * @param region region to close Index: src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java (revision 1228325) +++ src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestCloseRegionHandler.java (working copy) @@ -25,13 +25,25 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.RegionServerServices; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MockRegionServerServices; import org.apache.hadoop.hbase.util.MockServer; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NodeExistsException; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import org.mockito.Mockito; @@ -43,8 +55,33 @@ public class TestCloseRegionHandler { static final Log LOG = LogFactory.getLog(TestCloseRegionHandler.class); private final static HBaseTestingUtility HTU = new HBaseTestingUtility(); + private static final HTableDescriptor TEST_HTD = + new HTableDescriptor("TestCloseRegionHandler"); + private HRegionInfo TEST_HRI; + private int testIndex = 0; + @BeforeClass public static void before() throws Exception { + HTU.startMiniZKCluster(); + } + + @AfterClass public static void after() throws IOException { + HTU.shutdownMiniZKCluster(); + } + /** + * Before each test, use a different HRI, so the different tests + * don't interfere with each other. This allows us to use just + * a single ZK cluster for the whole suite. + */ + @Before + public void setupHRI() { + TEST_HRI = new HRegionInfo(TEST_HTD.getName(), + Bytes.toBytes(testIndex), + Bytes.toBytes(testIndex + 1)); + testIndex++; + } + + /** * Test that if we fail a flush, abort gets set on close. * @see HBASE-4270 * @throws IOException @@ -55,7 +92,7 @@ throws IOException, NodeExistsException, KeeperException { final Server server = new MockServer(HTU, false); final RegionServerServices rss = new MockRegionServerServices(); - HTableDescriptor htd = new HTableDescriptor("testFailedFlushAborts"); + HTableDescriptor htd = TEST_HTD; final HRegionInfo hri = new HRegionInfo(htd.getName(), HConstants.EMPTY_END_ROW, HConstants.EMPTY_END_ROW); @@ -74,7 +111,7 @@ // Assert the Server is NOT stopped before we call close region. assertFalse(server.isStopped()); CloseRegionHandler handler = - new CloseRegionHandler(server, rss, hri, false, false); + new CloseRegionHandler(server, rss, hri, false, false, -1); boolean throwable = false; try { handler.process(); @@ -86,6 +123,93 @@ assertTrue(server.isStopped()); } } + + /** + * Test if close region can handle ZK closing node version mismatch + * @throws IOException + * @throws NodeExistsException + * @throws KeeperException + */ + @Test public void testZKClosingNodeVersionMismatch() + throws IOException, NodeExistsException, KeeperException { + final Server server = new MockServer(HTU); + final RegionServerServices rss = new MockRegionServerServices(); + + HTableDescriptor htd = TEST_HTD; + final HRegionInfo hri = TEST_HRI; + + // open a region first so that it can be closed later + OpenRegion(server, rss, htd, hri); + + // close the region + // Create it CLOSING, which is what Master set before sending CLOSE RPC + int versionOfClosingNode = ZKAssign.createNodeClosing(server.getZooKeeper(), + hri, server.getServerName()); + + // The CloseRegionHandler will validate the expected version + // Given it is set to invalid versionOfClosingNode+1, + // CloseRegionHandler should be M_ZK_REGION_CLOSING + CloseRegionHandler handler = + new CloseRegionHandler(server, rss, hri, false, true, + versionOfClosingNode+1); + handler.process(); + + // Handler should remain in M_ZK_REGION_CLOSING + RegionTransitionData data = + ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()); + assertTrue(EventType.M_ZK_REGION_CLOSING == data.getEventType()); + } + + /** + * Test if the region can be closed properly + * @throws IOException + * @throws NodeExistsException + * @throws KeeperException + */ + @Test public void testCloseRegion() + throws IOException, NodeExistsException, KeeperException { + final Server server = new MockServer(HTU); + final RegionServerServices rss = new MockRegionServerServices(); + + HTableDescriptor htd = TEST_HTD; + HRegionInfo hri = TEST_HRI; + + // open a region first so that it can be closed later + OpenRegion(server, rss, htd, hri); + + // close the region + // Create it CLOSING, which is what Master set before sending CLOSE RPC + int versionOfClosingNode = ZKAssign.createNodeClosing(server.getZooKeeper(), + hri, server.getServerName()); + + // The CloseRegionHandler will validate the expected version + // Given it is set to correct versionOfClosingNode, + // CloseRegionHandlerit should be RS_ZK_REGION_CLOSED + CloseRegionHandler handler = + new CloseRegionHandler(server, rss, hri, false, true, + versionOfClosingNode); + handler.process(); + // Handler should have transitioned it to RS_ZK_REGION_CLOSED + RegionTransitionData data = + ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()); + assertTrue(EventType.RS_ZK_REGION_CLOSED == data.getEventType()); + } + private void OpenRegion(Server server, RegionServerServices rss, + HTableDescriptor htd, HRegionInfo hri) + throws IOException, NodeExistsException, KeeperException { + // Create it OFFLINE node, which is what Master set before sending OPEN RPC + ZKAssign.createNodeOffline(server.getZooKeeper(), hri, + server.getServerName()); + OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri, + htd); + openHandler.process(); + RegionTransitionData data = + ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName()); + + // delete the node, which is what Master do after the region is opened + ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(), + EventType.RS_ZK_REGION_OPENED); + } @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = Index: src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (revision 1228375) +++ src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (working copy) @@ -101,7 +101,8 @@ // First amend the servermanager mock so that when we do send close of the // first meta region on RANDOM_SERVERNAME, it will return true rather than // default null. - Mockito.when(this.serverManager.sendRegionClose(RANDOM_SERVERNAME, hri)).thenReturn(true); + Mockito.when(this.serverManager.sendRegionClose(RANDOM_SERVERNAME, hri, -1)) + .thenReturn(true); // Create an AM. AssignmentManager am = new AssignmentManager(this.server, this.serverManager, this.ct, this.executor);