Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1229039) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -60,9 +60,11 @@ import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; +import org.apache.hadoop.hbase.master.AssignmentManager.RegionState.State; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; @@ -84,6 +86,7 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.data.Stat; @@ -1741,6 +1744,7 @@ // TODO: Method needs refactoring. Ugly buried returns throughout. Beware! LOG.debug("Starting unassignment of region " + region.getRegionNameAsString() + " (offlining)"); + synchronized (this.regions) { // Check if this region is currently assigned if (!regions.containsKey(region)) { @@ -1808,12 +1812,37 @@ "already in transition (" + state.getState() + ", force=" + force + ")"); return; } - } + } // Send CLOSE RPC ServerName server = null; synchronized (this.regions) { server = regions.get(region); } + // ClosedRegionhandler can remove the server from this.regions + if (server == null) { + // Possibility of disable flow removing from RIT. + synchronized (regionsInTransition) { + state = regionsInTransition.get(encodedName); + if (state != null) { + // remove only if the state is PENDING_CLOSE or CLOSING + State presentState = state.getState(); + if (presentState == State.PENDING_CLOSE + || presentState == State.CLOSING) { + this.regionsInTransition.remove(encodedName); + } + } + } + try { + // delete the node. if no node exists need not bother. + deleteClosingOrClosedNode(region); + } catch (KeeperException e) { + master.abort( + "Unexpected ZK exception deleting node CLOSING/CLOSED for the region " + + region.getEncodedName(), e); + return; + } + return; + } try { // TODO: We should consider making this look more like it does for the // region open where we catch all throwables and never abort @@ -1849,6 +1878,15 @@ synchronized (this.regions) { this.regions.remove(region); } + try { + deleteClosingOrClosedNode(region); + } catch (KeeperException ke) { + master.abort( + "Unexpected ZK exception deleting node CLOSING/CLOSED for the region " + + region.getEncodedName(), ke); + return; + } + } } // RS is already processing this region, only need to update the timestamp @@ -1862,6 +1900,20 @@ // Presume retry or server will expire. } } + + private void deleteClosingOrClosedNode(HRegionInfo region) + throws KeeperException { + try { + if (!ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(), + EventHandler.EventType.M_ZK_REGION_CLOSING)) { + ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(), + EventHandler.EventType.RS_ZK_REGION_CLOSED); + } + } catch (NoNodeException e) { + LOG.debug("CLOSING/CLOSED node for the region " + region.getEncodedName() + + " already deleted"); + } + } /** * @param path