diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 711a7a7..8a699c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -310,17 +310,30 @@ protected void submitApplication( } protected void recoverApplication(ApplicationStateData appState, - RMState rmState) throws Exception { + RMState rmState) throws YarnException { ApplicationSubmissionContext appContext = appState.getApplicationSubmissionContext(); ApplicationId appId = appContext.getApplicationId(); // create and recover app. - RMAppImpl application = - createAndPopulateNewRMApp(appContext, appState.getSubmitTime(), + try { + RMAppImpl application = + createAndPopulateNewRMApp(appContext, appState.getSubmitTime(), appState.getUser(), true); - application.handle(new RMAppRecoverEvent(appId, rmState)); + application.handle(new RMAppRecoverEvent(appId, rmState)); + } catch (Exception ex) { + if (!YarnConfiguration.shouldRMFailFast(conf)) { + // If an app recovery fails, and we don't want to cause the RM startup + // to fail, make sure the state is clean and log an error. + rmContext.getRMApps().remove(appId); + + LOG.error("Failed to recover application " + appId, ex); + } else { + // Otherwise, let the exception bring the RM down. + throw ex; + } + } } private RMAppImpl createAndPopulateNewRMApp(