Description
There are occurrences in AM where this.servers is not kept consistent with this.regions. This might cause balancer to offline a region from the RS that already returned NotServingRegionException at a previous offline attempt.
In AssignmentManager.unassign(HRegionInfo, boolean)
try {
// TODO: We should consider making this look more like it does for the
// region open where we catch all throwables and never abort
if (serverManager.sendRegionClose(server, state.getRegion(),
versionOfClosingNode))
// This never happens. Currently regionserver close always return true.
LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
region.getRegionNameAsString());
} catch (NotServingRegionException nsre)
catch (Throwable t) {
if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException();
if (t instanceof NotServingRegionException) {
if (checkIfRegionBelongsToDisabling(region)) {
// Remove from the regionsinTransition map
LOG.info("While trying to recover the table "
+ region.getTableNameAsString()
+ " to DISABLED state the region " + region
+ " was offlined but the table was in DISABLING state");
synchronized (this.regionsInTransition)
// Remove from the regionsMap
synchronized (this.regions)
deleteClosingOrClosedNode(region);
}
}
// RS is already processing this region, only need to update the timestamp
if (t instanceof RegionAlreadyInTransitionException)
}
In AssignmentManager.assign(HRegionInfo, RegionState, boolean, boolean, boolean)
synchronized (this.regions)