diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index c065b60..c7402b6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -487,8 +487,17 @@ public void recover(RMState state) throws Exception { Map appStates = state.getApplicationState(); LOG.info("Recovering " + appStates.size() + " applications"); - for (ApplicationStateData appState : appStates.values()) { - recoverApplication(appState, state); + + int count = 0; + + try { + for (ApplicationStateData appState : appStates.values()) { + recoverApplication(appState, state); + count += 1; + } + } finally { + LOG.info("Successfully recovered " + count + " out of " + + appStates.size() + " applications"); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0fdc311..0b5ec4a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -121,6 +121,10 @@ private static final Log LOG = LogFactory.getLog(RMAppImpl.class); private static final String UNAVAILABLE = "N/A"; + private static final String STATE_CHANGE_MESSAGE = + "%s State change from %s to %s on event=%s"; + private static final String RECOVERY_MESSAGE = + "Recovering app: %s with %d attempts and final state = %s"; // Immutable fields private final ApplicationId applicationId; @@ -875,9 +879,16 @@ public void handle(RMAppEvent event) { /* TODO fail the application on the failed transition */ } - if (oldState != getState()) { - LOG.info(appID + " State change from " + oldState + " to " - + getState() + " on event=" + event.getType()); + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((oldState != getState()) && + (((recoveredFinalState == null)) || + (event.getType() != RMAppEventType.RECOVER))) { + LOG.info(String.format(STATE_CHANGE_MESSAGE, appID, oldState, + getState(), event.getType())); + } else if ((oldState != getState()) && LOG.isDebugEnabled()) { + LOG.debug(String.format(STATE_CHANGE_MESSAGE, appID, oldState, + getState(), event.getType())); } } finally { this.writeLock.unlock(); @@ -889,9 +900,15 @@ public void recover(RMState state) { ApplicationStateData appState = state.getApplicationState().get(getApplicationId()); this.recoveredFinalState = appState.getState(); - LOG.info("Recovering app: " + getApplicationId() + " with " + - + appState.getAttemptCount() + " attempts and final state = " - + this.recoveredFinalState ); + + if (recoveredFinalState == null) { + LOG.info(String.format(RECOVERY_MESSAGE, getApplicationId(), + appState.getAttemptCount(), "NONE")); + } else if (LOG.isDebugEnabled()) { + LOG.debug(String.format(RECOVERY_MESSAGE, getApplicationId(), + appState.getAttemptCount(), recoveredFinalState)); + } + this.diagnostics.append(null == appState.getDiagnostics() ? "" : appState .getDiagnostics()); this.storedFinishTime = appState.getFinishTime(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 5b78eb8..609727f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -114,6 +114,10 @@ @SuppressWarnings({"unchecked", "rawtypes"}) public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { + private static final String STATE_CHANGE_MESSAGE = + "%s State change from %s to %s"; + private static final String RECOVERY_MESSAGE = + "Recovering attempt: %s with final state: %s"; private static final Log LOG = LogFactory.getLog(RMAppAttemptImpl.class); @@ -867,9 +871,16 @@ public void handle(RMAppAttemptEvent event) { /* TODO fail the application on the failed transition */ } - if (oldState != getAppAttemptState()) { - LOG.info(appAttemptID + " State change from " + oldState + " to " - + getAppAttemptState()); + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((oldState != getAppAttemptState()) && + ((recoveredFinalState == null) || + (event.getType() != RMAppAttemptEventType.RECOVER))) { + LOG.info(String.format(STATE_CHANGE_MESSAGE, appAttemptID, oldState, + getAppAttemptState())); + } else if ((oldState != getAppAttemptState()) && LOG.isDebugEnabled()) { + LOG.debug(String.format(STATE_CHANGE_MESSAGE, appAttemptID, oldState, + getAppAttemptState())); } } finally { this.writeLock.unlock(); @@ -902,8 +913,14 @@ public void recover(RMState state) { ApplicationAttemptStateData attemptState = appState.getAttempt(getAppAttemptId()); assert attemptState != null; - LOG.info("Recovering attempt: " + getAppAttemptId() + " with final state: " - + attemptState.getState()); + + if (attemptState.getState() == null) { + LOG.info(String.format(RECOVERY_MESSAGE, getAppAttemptId(), "NONE")); + } else if (LOG.isDebugEnabled()) { + LOG.debug(String.format(RECOVERY_MESSAGE, getAppAttemptId(), + attemptState.getState())); + } + diagnostics.append("Attempt recovered after RM restart"); diagnostics.append(attemptState.getDiagnostics()); this.amContainerExitStatus = attemptState.getAMContainerExitStatus();