Index: src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1300444) +++ src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1117,9 +1117,17 @@ if (rs != null) { HRegionInfo regionInfo = rs.getRegion(); if (rs.isSplitting() || rs.isSplit()) { - LOG.debug("Ephemeral node deleted, regionserver crashed?, " + - "clearing from RIT; rs=" + rs); - clearRegionFromTransition(rs.getRegion()); + LOG.debug("Ephemeral node deleted, split rolled back or regionserver crashed?" + + ",clearing from RIT; rs=" + rs); + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(rs.getRegion().getEncodedName()); + } + if (checkIfRegionBelongsToDisabling(rs.getRegion())) { + LOG.info("The table " + rs.getRegion().getTableNameAsString() + + " was in DISABLING state, unassign region " + + rs.getRegion().getRegionNameAsString()); + unassign(rs.getRegion()); + } } else { LOG.debug("The znode of region " + regionInfo.getRegionNameAsString() + " has been deleted."); @@ -2034,28 +2042,20 @@ } catch (NotServingRegionException nsre) { LOG.info("Server " + server + " returned " + nsre + " for " + region.getRegionNameAsString()); - // Presume that master has stale data. Presume remote side just split. - // Presume that the split message when it comes in will fix up the master's - // in memory cluster state. + RegionState regionState = regionsInTransition.get(encodedName); + if (regionState != null + && (regionState.isSplitting() || regionState.isSplit())) { + LOG.info("Region " + region.getRegionNameAsString() + + " is splitting, return"); + return; + } else { + cancelClosingRegionIfDisabling(region); + } } catch (Throwable t) { if (t instanceof RemoteException) { t = ((RemoteException)t).unwrapRemoteException(); if (t instanceof NotServingRegionException) { - if (checkIfRegionBelongsToDisabling(region)) { - // Remove from the regionsinTransition map - LOG.info("While trying to recover the table " - + region.getTableNameAsString() - + " to DISABLED state the region " + region - + " was offlined but the table was in DISABLING state"); - synchronized (this.regionsInTransition) { - this.regionsInTransition.remove(region.getEncodedName()); - } - // Remove from the regionsMap - synchronized (this.regions) { - this.regions.remove(region); - } - deleteClosingOrClosedNode(region); - } + cancelClosingRegionIfDisabling(region); } // RS is already processing this region, only need to update the timestamp if (t instanceof RegionAlreadyInTransitionException) { @@ -2068,7 +2068,29 @@ // Presume retry or server will expire. } } - + + /** + * Cancel closing region if it is disabling. Currently it is only called after + * region CLOSE RPC returned {@link NotServingRegionException} + * @param region + */ + private void cancelClosingRegionIfDisabling(HRegionInfo region) { + if (checkIfRegionBelongsToDisabling(region)) { + // Remove from the regionsinTransition map + LOG.info("While trying to recover the table " + + region.getTableNameAsString() + " to DISABLED state the region " + + region + " was offlined but the table was in DISABLING state"); + synchronized (this.regionsInTransition) { + this.regionsInTransition.remove(region.getEncodedName()); + } + // Remove from the regionsMap + synchronized (this.regions) { + this.regions.remove(region); + } + deleteClosingOrClosedNode(region); + } + } + private void deleteClosingOrClosedNode(HRegionInfo region) { try { if (!ZKAssign.deleteNode(master.getZooKeeper(), region.getEncodedName(),