Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1211314) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.catalog.RootLocationEditor; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.executor.RegionTransitionData; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; @@ -83,6 +84,7 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.AsyncCallback; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.data.Stat; @@ -1739,6 +1741,7 @@ // TODO: Method needs refactoring. Ugly buried returns throughout. Beware! LOG.debug("Starting unassignment of region " + region.getRegionNameAsString() + " (offlining)"); + synchronized (this.regions) { // Check if this region is currently assigned if (!regions.containsKey(region)) { @@ -1799,7 +1802,8 @@ "already in transition (" + state.getState() + ", force=" + force + ")"); return; } - } + } + // Send CLOSE RPC ServerName server = null; synchronized (this.regions) { @@ -1823,6 +1827,15 @@ // Presume that the split message when it comes in will fix up the master's // in memory cluster state. } catch (Throwable t) { + if (t instanceof NullPointerException) { + removeRegionInTransition(region); + try { + deleteClosingNode(region); + } catch (KeeperException ke) { + master.abort("Unexpected ZK exception deleting node CLOSING", ke); + return; + } + } if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); if (t instanceof NotServingRegionException) { @@ -1832,13 +1845,15 @@ + region.getTableNameAsString() + " to DISABLED state the region " + region + " was offlined but the table was in DISABLING state"); - synchronized (this.regionsInTransition) { - this.regionsInTransition.remove(region.getEncodedName()); + removeRegionInTransition(region); + removeFromOnlineRegions(region); + try { + deleteClosingNode(region); + } catch (KeeperException ke) { + master.abort("Unexpected ZK exception deleting node CLOSING", ke); + return; } - // Remove from the regionsMap - synchronized (this.regions) { - this.regions.remove(region); - } + } } // RS is already processing this region, only need to update the timestamp @@ -1853,6 +1868,29 @@ } } + private void removeFromOnlineRegions(HRegionInfo region) { + // Remove from the regionsMap + synchronized (this.regions) { + this.regions.remove(region); + } + } + + private void removeRegionInTransition(HRegionInfo region) { + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(region.getEncodedName()); + } + } + + private void deleteClosingNode(HRegionInfo region) throws KeeperException { + try { + ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(), + EventHandler.EventType.M_ZK_REGION_CLOSING); + } catch (NoNodeException e) { + LOG.debug("CLOSING node for the region " + + region.getEncodedName() + " already deleted"); + } + } + /** * @param path * @return True if znode is in SPLIT or SPLITTING state. @@ -2437,9 +2475,7 @@ * @param hri Region to remove. */ public void clearRegionFromTransition(HRegionInfo hri) { - synchronized (this.regionsInTransition) { - this.regionsInTransition.remove(hri.getEncodedName()); - } + removeRegionInTransition(hri); synchronized (this.regions) { this.regions.remove(hri); for (Set regions : this.servers.values()) {