Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1369721) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -1539,12 +1539,13 @@ private void assign(final HRegionInfo region, final RegionState state, final boolean setOfflineInZK, final boolean forceNewPlan, boolean hijack) { + boolean isRITException = false; for (int i = 0; i < this.maximumAssignmentAttempts; i++) { int versionOfOfflineNode = -1; if (setOfflineInZK) { // get the version of the znode after setting it to OFFLINE. // versionOfOfflineNode will be -1 if the znode was not set to OFFLINE - versionOfOfflineNode = setOfflineInZooKeeper(state, hijack); + versionOfOfflineNode = setOfflineInZooKeeper(state, hijack, isRITException); if (versionOfOfflineNode != -1) { if (isDisabledorDisablingRegionInRIT(region)) { return; @@ -1597,23 +1598,32 @@ if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); if (t instanceof RegionAlreadyInTransitionException) { - String errorMsg = "Failed assignment in: " + plan.getDestination() - + " due to " + t.getMessage(); - LOG.error(errorMsg, t); - return; + isRITException = true; + if (LOG.isDebugEnabled()) { + LOG.debug(t.getMessage()); + } } } - LOG.warn("Failed assignment of " + - state.getRegion().getRegionNameAsString() + " to " + - plan.getDestination() + ", trying to assign elsewhere instead; " + - "retry=" + i, t); + LOG.warn("Failed assignment of " + + state.getRegion().getRegionNameAsString() + + " to " + + plan.getDestination() + + ", trying to assign " + + (isRITException ? "to the same region server because of RITException;" + : "elsewhere instead; ") + "retry=" + i, t); // Clean out plan we failed execute and one that doesn't look like it'll // succeed anyways; we need a new plan! // Transition back to OFFLINE regionStates.updateRegionState( state.getRegion(), RegionState.State.OFFLINE); - // Force a new plan and reassign. Will return null if no servers. - if (getRegionPlan(state, plan.getDestination(), true) == null) { + // If region opened on destination of present plan and reassign to new RS may cause + // double assignments. In case of RITException reassigning to same RS. + RegionPlan newPlan = plan; + if (!isRITException) { + // Force a new plan and reassign. Will return null if no servers. + newPlan = getRegionPlan(state, plan.getDestination(), true); + } + if (newPlan == null) { this.timeoutMonitor.setAllRegionServersOffline(true); LOG.warn("Unable to find a viable location to assign region " + state.getRegion().getRegionNameAsString()); @@ -1664,17 +1674,22 @@ * @param state * @param hijack * - true if needs to be hijacked and reassigned, false otherwise. + * @param isRITException + * - true if we need to retry assignment because of RITException. * @return the version of the offline node if setting of the OFFLINE node was * successful, -1 otherwise. */ - int setOfflineInZooKeeper(final RegionState state, - boolean hijack) { + int setOfflineInZooKeeper(final RegionState state, boolean hijack, boolean isRITException) { // In case of reassignment the current state in memory need not be // OFFLINE. if (!hijack && !state.isClosed() && !state.isOffline()) { - String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; - this.master.abort(msg, new IllegalStateException(msg)); - return -1; + if (!isRITException) { + String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; + this.master.abort(msg, new IllegalStateException(msg)); + return -1; + } else { + LOG.debug("Unexpected state : " + state + " but retrying to assign because RITException."); + } } boolean allowZNodeCreation = false; // Under reassignment if the current state is PENDING_OPEN