diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 7855042..43401f4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -403,8 +403,17 @@ public void recover(RMState state) throws Exception { Map appStates = state.getApplicationState(); LOG.info("Recovering " + appStates.size() + " applications"); for (ApplicationState appState : appStates.values()) { - submitApplication(appState.getApplicationSubmissionContext(), - appState.getSubmitTime(), appState.getUser(), true, state); + ApplicationSubmissionContext context = + appState.getApplicationSubmissionContext(); + + // TODO (YARN-1823): Recover unmanaged AMs as well + // Without work-preserving restart, there is no point recovering + // unmanaged AMs since we can't restart the actual AM. Recover only + // managed AMs for now. + if (!context.getUnmanagedAM()) { + submitApplication(context, appState.getSubmitTime(), + appState.getUser(), true, state); + } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index ad2e17f..1d35865 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -250,10 +250,11 @@ public void testRMRestart() throws Exception { // verify load of old state // 4 apps are loaded. // FINISHED app and attempt is also loaded back. - // Unmanaged app state is still loaded back but it cannot be restarted by - // the RM. this will change with work preserving RM restart in which AMs/NMs + + // TODO (YARN-1823): Unmanaged app state is not loaded back, but this + // will change with work preserving RM restart in which AMs/NMs // are not rebooted. - Assert.assertEquals(4, rm2.getRMContext().getRMApps().size()); + Assert.assertEquals(3, rm2.getRMContext().getRMApps().size()); // check that earlier finished app and attempt is also loaded back and move // to finished state. rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);