diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java index 7b809da..edbd34d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationState.java @@ -30,9 +30,15 @@ /** Application which was just created. */ NEW, + /** Application which is being saved. */ + NEW_SAVING, + /** Application which has been submitted. */ SUBMITTED, - + + /** Application has been accepted by the scheduler */ + ACCEPTED, + /** Application which is currently running. */ RUNNING, @@ -43,8 +49,5 @@ FAILED, /** Application which was terminated by a user or admin. */ - KILLED, - - /** Application has been accepted by the scheduler */ - ACCEPTED + KILLED } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index aec162c..c13399f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -72,12 +72,13 @@ message ContainerProto { enum YarnApplicationStateProto { NEW = 1; - SUBMITTED = 2; - RUNNING = 3; - FINISHED = 4; - FAILED = 5; - KILLED = 6; - ACCEPTED = 7; + NEW_SAVING = 2; + SUBMITTED = 3; + ACCEPTED = 4; + RUNNING = 5; + FINISHED = 6; + FAILED = 7; + KILLED = 8; } enum FinalApplicationStatusProto { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 1c3c55e..bcc1f64 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -297,20 +297,6 @@ public SubmitApplicationResponse submitApplication( // So call handle directly and do not send an event. rmAppManager.handle(new RMAppManagerSubmitEvent(submissionContext, System .currentTimeMillis())); - - // If recovery is enabled then store the application information in a - // blocking call so make sure that RM has stored the information needed - // to restart the AM after RM restart without further client communication - RMStateStore stateStore = rmContext.getStateStore(); - LOG.info("Storing Application with id " + applicationId); - try { - stateStore.storeApplication(rmContext.getRMApps().get(applicationId)); - } catch (Exception e) { - // For HA this exception needs to be handled by giving up - // master status if we got fenced - LOG.error("Failed to store application:" + applicationId, e); - ExitUtil.terminate(1, e); - } LOG.info("Application with id " + applicationId.getId() + " submitted by user " + user); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java index c5d5937..4b398d4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.recovery; +import java.io.IOException; + import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; @@ -63,6 +65,11 @@ public void storeApplicationState(String appId, ApplicationState appState = new ApplicationState( appStateData.getSubmitTime(), appStateData.getApplicationSubmissionContext()); + if (state.appState.containsKey(appState.getAppId())) { + Exception e = new IOException("App: " + appId + " is already stored."); + LOG.info("Error storing info for app: " + appId, e); + throw e; + } state.appState.put(appState.getAppId(), appState); } @@ -79,6 +86,13 @@ public synchronized void storeApplicationAttemptState(String attemptIdStr, attemptState.getAttemptId().getApplicationId()); assert appState != null; + if (appState.attempts.containsKey(attemptState.getAttemptId())) { + Exception e = new IOException("Attempt: " + + attemptState.getAttemptId() + " is already stored."); + LOG.info("Error storing info for attempt: " + + attemptState.getAttemptId(), e); + throw e; + } appState.attempts.put(attemptState.getAttemptId(), attemptState); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 674a779..eab6b74 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppStoredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; @@ -166,21 +167,19 @@ public synchronized void close() throws Exception { public abstract RMState loadState() throws Exception; /** - * Blocking API + * Non-Blocking API * ResourceManager services use this to store the application's state - * This must not be called on the dispatcher thread + * This does not block the dispatcher threads + * RMAppStoredEvent will be sent on completion to notify the RMApp */ - public synchronized void storeApplication(RMApp app) throws Exception { + @SuppressWarnings("unchecked") + public synchronized void storeApplication(RMApp app) { ApplicationSubmissionContext context = app .getApplicationSubmissionContext(); assert context instanceof ApplicationSubmissionContextPBImpl; - - ApplicationStateDataPBImpl appStateData = new ApplicationStateDataPBImpl(); - appStateData.setSubmitTime(app.getSubmitTime()); - appStateData.setApplicationSubmissionContext(context); - - LOG.info("Storing info for app: " + context.getApplicationId()); - storeApplicationState(app.getApplicationId().toString(), appStateData); + ApplicationState appState = new ApplicationState( + app.getSubmitTime(), context); + dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState)); } /** @@ -255,6 +254,30 @@ protected abstract void removeApplicationState(ApplicationState appState) private synchronized void handleStoreEvent(RMStateStoreEvent event) { switch(event.getType()) { + case STORE_APP: + { + ApplicationState apptState = + ((RMStateStoreAppEvent) event).getAppState(); + Exception storedException = null; + ApplicationStateDataPBImpl appStateData = + new ApplicationStateDataPBImpl(); + appStateData.setSubmitTime(apptState.getSubmitTime()); + appStateData.setApplicationSubmissionContext( + apptState.getApplicationSubmissionContext()); + ApplicationId appId = + apptState.getApplicationSubmissionContext().getApplicationId(); + + LOG.info("Storing info for app: " + appId); + try { + storeApplicationState(appId.toString(), appStateData); + } catch (Exception e) { + LOG.error("Error storing app: " + appId, e); + storedException = e; + } finally { + notifyDoneStoringApplication(appId, storedException); + } + } + break; case STORE_APP_ATTEMPT: { ApplicationAttemptState attemptState = @@ -297,6 +320,20 @@ private synchronized void handleStoreEvent(RMStateStoreEvent event) { LOG.error("Unknown RMStateStoreEvent type: " + event.getType()); } } + + @SuppressWarnings("unchecked") + /** + * In (@link storeApplication}, derived class can call this method to notify + * the application about operation completion + * @param appId id of the application that has been saved + * @param storedException the exception that is thrown when storing the + * application + */ + private void notifyDoneStoringApplication(ApplicationId appId, + Exception storedException) { + rmDispatcher.getEventHandler().handle( + new RMAppStoredEvent(appId, storedException)); + } @SuppressWarnings("unchecked") /** diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppEvent.java new file mode 100644 index 0000000..99f8e37 --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreAppEvent.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.recovery; + +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; + +public class RMStateStoreAppEvent extends RMStateStoreEvent { + + private final ApplicationState appState; + + public RMStateStoreAppEvent(ApplicationState appState) { + super(RMStateStoreEventType.STORE_APP); + this.appState = appState; + } + + public ApplicationState getAppState() { + return appState; + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java index 22f155c..f5383fa 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreEventType.java @@ -20,5 +20,6 @@ public enum RMStateStoreEventType { STORE_APP_ATTEMPT, + STORE_APP, REMOVE_APP } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java index 20eef1d..678f24f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java @@ -26,6 +26,7 @@ // Source: RMAppAttempt APP_REJECTED, APP_ACCEPTED, + APP_SAVED, ATTEMPT_REGISTERED, ATTEMPT_FINISHING, ATTEMPT_FINISHED, // Will send the final state diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 62a3ba7..1cdf315 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -32,6 +32,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -104,6 +105,7 @@ private static final FinalTransition FINAL_TRANSITION = new FinalTransition(); private static final AppFinishedTransition FINISHED_TRANSITION = new AppFinishedTransition(); + private boolean isRecovered; private static final StateMachineFactory { + @Override + public RMAppState transition(RMAppImpl app, RMAppEvent event) { + if (app.isRecovered) { + app.createNewAttempt(true); + return RMAppState.SUBMITTED; + } + // If recovery is enabled then store the application information in a + // non-blocking call so make sure that RM has stored the information + // needed to restart the AM after RM restart without further client + // communication + LOG.info("Storing application with id " + app.applicationId); + app.rmContext.getStateStore().storeApplication( + app.rmContext.getRMApps().get(app.applicationId)); + return RMAppState.NEW_SAVING; + } + } + private static class AppFinishedTransition extends FinalTransition { public void transition(RMAppImpl app, RMAppEvent event) { RMAppFinishedAttemptEvent finishedEvent = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java index a9e3ce1..b7f9325 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppState.java @@ -19,5 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; public enum RMAppState { - NEW, SUBMITTED, ACCEPTED, RUNNING, FINISHING, FINISHED, FAILED, KILLED + NEW, + NEW_SAVING, + SUBMITTED, + ACCEPTED, + RUNNING, + FINISHING, + FINISHED, + FAILED, + KILLED } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppStoredEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppStoredEvent.java new file mode 100644 index 0000000..76fb1df --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppStoredEvent.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp; + +import org.apache.hadoop.yarn.api.records.ApplicationId; + +public class RMAppStoredEvent extends RMAppEvent { + + private final Exception storedException; + + public RMAppStoredEvent(ApplicationId appId, Exception storedException) { + super(appId, RMAppEventType.APP_SAVED); + this.storedException = storedException; + } + + public Exception getStoredException() { + return storedException; + } + +} \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java index a4826e8..cec16f3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java @@ -63,6 +63,7 @@ public void scheduler() { // limit applications to those in states relevant to scheduling set(YarnWebParams.APP_STATE, StringHelper.cjoin( RMAppState.NEW.toString(), + RMAppState.NEW_SAVING.toString(), RMAppState.SUBMITTED.toString(), RMAppState.ACCEPTED.toString(), RMAppState.RUNNING.toString(), diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index 8a38278..0977ec9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -83,7 +83,9 @@ public AppInfo(RMApp app, Boolean hasAccess) { String trackingUrl = app.getTrackingUrl(); this.state = app.getState(); this.trackingUrlIsNotReady = trackingUrl == null || trackingUrl.isEmpty() - || RMAppState.NEW == this.state || RMAppState.SUBMITTED == this.state + || RMAppState.NEW == this.state + || RMAppState.NEW_SAVING == this.state + || RMAppState.SUBMITTED == this.state || RMAppState.ACCEPTED == this.state; this.trackingUI = this.trackingUrlIsNotReady ? "UNASSIGNED" : (app .getFinishTime() == 0 ? "ApplicationMaster" : "History"); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 8e8e485..cb40372 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -18,9 +18,12 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import java.io.IOException; +import java.util.Map; import junit.framework.Assert; @@ -41,6 +44,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; @@ -131,6 +137,7 @@ public void handle(SchedulerEvent event) { } @Before + @SuppressWarnings ("unchecked") public void setUp() throws Exception { Configuration conf = new Configuration(); rmDispatcher = new DrainDispatcher(); @@ -138,8 +145,17 @@ public void setUp() throws Exception { mock(ContainerAllocationExpirer.class); AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class); AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class); + RMStateStore store = mock(RMStateStore.class); + RMState rmState = mock(RMState.class); + Map appStates = + mock(Map.class); + ApplicationState appState = mock(ApplicationState.class); + when(store.loadState()).thenReturn(rmState); + when(rmState.getApplicationState()).thenReturn(appStates); + when(appStates.get(any(ApplicationId.class))).thenReturn(appState); + when(appState.getAttemptCount()).thenReturn(0); this.rmContext = - new RMContextImpl(rmDispatcher, + new RMContextImpl(rmDispatcher, store, containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor, null, new ApplicationTokenSecretManager(conf), new RMContainerTokenSecretManager(conf), @@ -264,22 +280,51 @@ private static void assertFailed(RMApp application, String regex) { diag.toString().matches(regex)); } - protected RMApp testCreateAppSubmitted( + protected RMApp testCreateAppNewSaving( ApplicationSubmissionContext submissionContext) throws IOException { RMApp application = createNewTestApp(submissionContext); - // NEW => SUBMITTED event RMAppEventType.START + // NEW => NEW_SAVING event RMAppEventType.START RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.START); application.handle(event); assertStartTimeSet(application); + assertAppState(RMAppState.NEW_SAVING, application); + return application; + } + + protected RMApp testCreateAppSubmittedNoRecovery( + ApplicationSubmissionContext submissionContext) throws IOException { + RMApp application = testCreateAppNewSaving(submissionContext); + // NEW_SAVING => SUBMITTED event RMAppEventType.APP_SAVED + RMAppEvent event = + new RMAppStoredEvent(application.getApplicationId(), null); + application.handle(event); + assertStartTimeSet(application); + assertAppState(RMAppState.SUBMITTED, application); + return application; + } + + protected RMApp testCreateAppSubmittedRecovery( + ApplicationSubmissionContext submissionContext) throws IOException { + RMApp application = createNewTestApp(submissionContext); + try { + ((RMAppImpl) application).recover(rmContext.getStateStore().loadState()); + } catch (Exception e) { + Assert.fail("Exception when setting isRecovered to false."); + } + // NEW => SUBMITTED event RMAppEventType.START + RMAppEvent event = + new RMAppEvent(application.getApplicationId(), RMAppEventType.START); + application.handle(event); + assertStartTimeSet(application); assertAppState(RMAppState.SUBMITTED, application); return application; } protected RMApp testCreateAppAccepted( ApplicationSubmissionContext submissionContext) throws IOException { - RMApp application = testCreateAppSubmitted(submissionContext); - // SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED + RMApp application = testCreateAppSubmittedNoRecovery(submissionContext); + // SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED); @@ -291,7 +336,7 @@ protected RMApp testCreateAppAccepted( protected RMApp testCreateAppRunning( ApplicationSubmissionContext submissionContext) throws IOException { - RMApp application = testCreateAppAccepted(submissionContext); + RMApp application = testCreateAppAccepted(submissionContext); // ACCEPTED => RUNNING event RMAppEventType.ATTEMPT_REGISTERED RMAppEvent event = new RMAppEvent(application.getApplicationId(), @@ -402,11 +447,38 @@ public void testAppNewReject() throws IOException { assertFailed(application, rejectedText); } - @Test - public void testAppSubmittedRejected() throws IOException { - LOG.info("--- START: testAppSubmittedRejected ---"); + @Test (timeout = 30000) + public void testAppNewSavingKill() throws IOException { + LOG.info("--- START: testAppNewSavingKill ---"); + + RMApp application = testCreateAppNewSaving(null); + // NEW_SAVING => KILLED event RMAppEventType.KILL + RMAppEvent event = + new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); + application.handle(event); + rmDispatcher.await(); + assertKilled(application); + } + + @Test (timeout = 30000) + public void testAppNewSavingReject() throws IOException { + LOG.info("--- START: testAppNewSavingReject ---"); + + RMApp application = testCreateAppNewSaving(null); + // NEW_SAVING => FAILED event RMAppEventType.APP_REJECTED + String rejectedText = "Test Application Rejected"; + RMAppEvent event = + new RMAppRejectedEvent(application.getApplicationId(), rejectedText); + application.handle(event); + rmDispatcher.await(); + assertFailed(application, rejectedText); + } - RMApp application = testCreateAppSubmitted(null); + @Test (timeout = 30000) + public void testAppSubmittedNoRecoveryRejected() throws IOException { + LOG.info("--- START: testAppSubmittedNoRecoveryRejected ---"); + + RMApp application = testCreateAppSubmittedNoRecovery(null); // SUBMITTED => FAILED event RMAppEventType.APP_REJECTED String rejectedText = "app rejected"; RMAppEvent event = @@ -416,10 +488,41 @@ public void testAppSubmittedRejected() throws IOException { assertFailed(application, rejectedText); } - @Test - public void testAppSubmittedKill() throws IOException, InterruptedException { - LOG.info("--- START: testAppSubmittedKill---"); - RMApp application = testCreateAppSubmitted(null); + @Test (timeout = 30000) + public void testAppSubmittedNoRecoveryKill() + throws IOException, InterruptedException { + LOG.info("--- START: testAppSubmittedNoRecoveryKill---"); + RMApp application = testCreateAppSubmittedNoRecovery(null); + // SUBMITTED => KILLED event RMAppEventType.KILL + RMAppEvent event = new RMAppEvent(application.getApplicationId(), + RMAppEventType.KILL); + this.rmContext.getRMApps().putIfAbsent(application.getApplicationId(), + application); + application.handle(event); + rmDispatcher.await(); + assertKilled(application); + assertAppAndAttemptKilled(application); + } + + @Test (timeout = 30000) + public void testAppSubmittedRecoveryRejected() throws IOException { + LOG.info("--- START: testAppSubmittedRecoveryRejected ---"); + + RMApp application = testCreateAppSubmittedRecovery(null); + // SUBMITTED => FAILED event RMAppEventType.APP_REJECTED + String rejectedText = "app rejected"; + RMAppEvent event = + new RMAppRejectedEvent(application.getApplicationId(), rejectedText); + application.handle(event); + rmDispatcher.await(); + assertFailed(application, rejectedText); + } + + @Test (timeout = 30000) + public void testAppSubmittedRecoveryKill() + throws IOException, InterruptedException { + LOG.info("--- START: testAppSubmittedRecoveryKill---"); + RMApp application = testCreateAppSubmittedRecovery(null); // SUBMITTED => KILLED event RMAppEventType.KILL RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); @@ -570,7 +673,37 @@ public void testAppFinishedFinished() throws IOException { "", diag.toString()); } - @Test + @Test (timeout = 30000) + public void testAppFailedFailed() throws IOException { + LOG.info("--- START: testAppFailedFailed ---"); + + RMApp application = testCreateAppNewSaving(null); + + // NEW_SAVING => FAILED event RMAppEventType.APP_REJECTED + RMAppEvent event = + new RMAppRejectedEvent(application.getApplicationId(), ""); + application.handle(event); + rmDispatcher.await(); + assertTimesAtFinish(application); + assertAppState(RMAppState.FAILED, application); + + // KILLED => KILLED event RMAppEventType.KILL + event = + new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); + application.handle(event); + rmDispatcher.await(); + assertTimesAtFinish(application); + assertAppState(RMAppState.FAILED, application); + + // KILLED => KILLED event RMAppEventType.APP_SAVED + event = new RMAppStoredEvent(application.getApplicationId(), null); + application.handle(event); + rmDispatcher.await(); + assertTimesAtFinish(application); + assertAppState(RMAppState.FAILED, application); + } + + @Test (timeout = 30000) public void testAppKilledKilled() throws IOException { LOG.info("--- START: testAppKilledKilled ---"); @@ -616,9 +749,16 @@ public void testAppKilledKilled() throws IOException { rmDispatcher.await(); assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); + + // KILLED => KILLED event RMAppEventType.APP_SAVED + event = new RMAppStoredEvent(application.getApplicationId(), null); + application.handle(event); + rmDispatcher.await(); + assertTimesAtFinish(application); + assertAppState(RMAppState.KILLED, application); } - @Test + @Test (timeout = 30000) public void testGetAppReport() { RMApp app = createNewTestApp(null); assertAppState(RMAppState.NEW, app);