diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index c065b60..c7402b6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -487,8 +487,17 @@ public void recover(RMState state) throws Exception { Map appStates = state.getApplicationState(); LOG.info("Recovering " + appStates.size() + " applications"); - for (ApplicationStateData appState : appStates.values()) { - recoverApplication(appState, state); + + int count = 0; + + try { + for (ApplicationStateData appState : appStates.values()) { + recoverApplication(appState, state); + count += 1; + } + } finally { + LOG.info("Successfully recovered " + count + " out of " + + appStates.size() + " applications"); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0fdc311..9ffe60b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -875,9 +875,20 @@ public void handle(RMAppEvent event) { /* TODO fail the application on the failed transition */ } - if (oldState != getState()) { - LOG.info(appID + " State change from " + oldState + " to " - + getState() + " on event=" + event.getType()); + if ((oldState != getState()) && + ((event.getType() != RMAppEventType.RECOVER) || + (recoveredFinalState != null) || LOG.isDebugEnabled())) { + String message = appID + " State change from " + oldState + " to " + + getState() + " on event=" + event.getType(); + + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((event.getType() != RMAppEventType.RECOVER) || + (recoveredFinalState != null)) { + LOG.info(message); + } else { + LOG.debug(message); + } } } finally { this.writeLock.unlock(); @@ -889,9 +900,16 @@ public void recover(RMState state) { ApplicationStateData appState = state.getApplicationState().get(getApplicationId()); this.recoveredFinalState = appState.getState(); - LOG.info("Recovering app: " + getApplicationId() + " with " + - + appState.getAttemptCount() + " attempts and final state = " - + this.recoveredFinalState ); + + if (recoveredFinalState != null) { + LOG.info("Recovering app: " + getApplicationId() + " with " + + appState.getAttemptCount() + " attempts and no final state"); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Recovering app: " + getApplicationId() + " with " + + appState.getAttemptCount() + " attempts and final state = " + + this.recoveredFinalState ); + } + this.diagnostics.append(null == appState.getDiagnostics() ? "" : appState .getDiagnostics()); this.storedFinishTime = appState.getFinishTime(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 5b78eb8..7011604 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -867,9 +867,20 @@ public void handle(RMAppAttemptEvent event) { /* TODO fail the application on the failed transition */ } - if (oldState != getAppAttemptState()) { - LOG.info(appAttemptID + " State change from " + oldState + " to " - + getAppAttemptState()); + if ((oldState != getAppAttemptState()) && + ((event.getType() != RMAppAttemptEventType.RECOVER) || + (recoveredFinalState != null) || LOG.isDebugEnabled())) { + String message = appAttemptID + " State change from " + oldState + + " to " + getAppAttemptState(); + + // Log at INFO if we're not recovering or not in a terminal state. + // Log at DEBUG otherwise. + if ((event.getType() != RMAppAttemptEventType.RECOVER) || + (recoveredFinalState != null)) { + LOG.info(message); + } else { + LOG.debug(message); + } } } finally { this.writeLock.unlock(); @@ -902,8 +913,15 @@ public void recover(RMState state) { ApplicationAttemptStateData attemptState = appState.getAttempt(getAppAttemptId()); assert attemptState != null; - LOG.info("Recovering attempt: " + getAppAttemptId() + " with final state: " - + attemptState.getState()); + + if (attemptState.getState() != null) { + LOG.info("Recovering attempt: " + getAppAttemptId() + + " with no final state"); + } else if (LOG.isDebugEnabled()) { + LOG.debug("Recovering attempt: " + getAppAttemptId() + + " with final state: " + attemptState.getState()); + } + diagnostics.append("Attempt recovered after RM restart"); diagnostics.append(attemptState.getDiagnostics()); this.amContainerExitStatus = attemptState.getAMContainerExitStatus();