diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index b6ca684..1c9d08e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -1109,7 +1109,7 @@ private int getNumNonPreemptedAppAttempts() {
int completedAttempts = 0;
// Do not count AM preemption as attempt failure.
for (RMAppAttempt attempt : attempts.values()) {
- if (!attempt.isPreempted()) {
+ if (!attempt.shouldNotCountFailureToAttemptLimit()) {
completedAttempts++;
}
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
index 42c37a9..884a2bd 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
@@ -197,8 +197,14 @@
ApplicationAttemptReport createApplicationAttemptReport();
/**
- * Return the flag which indicates whether the attempt is preempted by the
- * scheduler.
- */
- boolean isPreempted();
+ * Return the flag which indicates whether the attempt failure should be
+ * counted to Attempt Limits.
+ *
+ * There failure types should not be counted to Attempt Limits:
+ * - preempted by the scheduler.
+ * - hardware failures, such as NM failing, lost NM and NM disk errors.
+ * - killed by RM because of RM restart or failover.
+ *
+ */
+ boolean shouldNotCountFailureToAttemptLimit();
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 4ac64ef..97f30bd 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -1083,7 +1083,7 @@ public void transition(RMAppAttemptImpl appAttempt,
.getKeepContainersAcrossApplicationAttempts()
&& !appAttempt.submissionContext.getUnmanagedAM()) {
// See if we should retain containers for non-unmanaged applications
- if (appAttempt.isPreempted()) {
+ if (appAttempt.shouldNotCountFailureToAttemptLimit()) {
// Premption doesn't count towards app-failures and so we should
// retain containers.
keepContainersAcrossAppAttempts = true;
@@ -1132,8 +1132,18 @@ public void transition(RMAppAttemptImpl appAttempt,
}
@Override
- public boolean isPreempted() {
- return getAMContainerExitStatus() == ContainerExitStatus.PREEMPTED;
+ public boolean shouldNotCountFailureToAttemptLimit() {
+ try {
+ this.readLock.lock();
+ int exitStatus = getAMContainerExitStatus();
+ return exitStatus == ContainerExitStatus.PREEMPTED
+ || exitStatus == ContainerExitStatus.ABORTED
+ || exitStatus == ContainerExitStatus.DISKS_FAILED
+ || exitStatus == ContainerExitStatus.KILLED_BY_RESOURCEMANAGER;
+ }
+ finally {
+ this.readLock.unlock();
+ }
}
private static final class UnmanagedAMAttemptSavedTransition
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMFailureCount.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMFailureCount.java
new file mode 100644
index 0000000..8e23b44
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMFailureCount.java
@@ -0,0 +1,201 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
+
+import java.util.Collections;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
+import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.apache.hadoop.yarn.util.Records;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestAMFailureCount {
+
+ // AM container preempted, nm disk failure
+ // should not be counted towards AM max retry count.
+ @Test(timeout = 50000)
+ public void testShouldNotCountFailureToAttemptLimit() throws Exception {
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
+ // explicitly set max-am-retry count as 1.
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
+ MockRM rm1 = new MockRM(conf);
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
+ nm1.registerNode();
+ RMApp app1 = rm1.submitApp(200);
+ RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ CapacityScheduler scheduler =
+ (CapacityScheduler) rm1.getResourceScheduler();
+ ContainerId amContainer =
+ ContainerId.newInstance(am1.getApplicationAttemptId(), 1);
+ // Preempt the first attempt;
+ scheduler.killContainer(scheduler.getRMContainer(amContainer));
+
+ am1.waitForState(RMAppAttemptState.FAILED);
+ Assert.assertTrue(attempt1.shouldNotCountFailureToAttemptLimit());
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ // AM should be restarted even though max-am-attempt is 1.
+ MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
+ Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
+
+ // Preempt the second attempt.
+ ContainerId amContainer2 =
+ ContainerId.newInstance(am2.getApplicationAttemptId(), 1);
+ scheduler.killContainer(scheduler.getRMContainer(amContainer2));
+
+ am2.waitForState(RMAppAttemptState.FAILED);
+ Assert.assertTrue(attempt2.shouldNotCountFailureToAttemptLimit());
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
+ Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt());
+
+ // mimic NM disk_failure
+ ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class);
+ containerStatus.setContainerId(attempt3.getMasterContainer().getId());
+ containerStatus.setDiagnostics("mimic NM disk_failure");
+ containerStatus.setState(ContainerState.COMPLETE);
+ containerStatus.setExitStatus(ContainerExitStatus.DISKS_FAILED);
+ RMAppAttemptContainerFinishedEvent event =
+ new RMAppAttemptContainerFinishedEvent(attempt3.getAppAttemptId(), containerStatus);
+ attempt3.handle(event);
+ am3.waitForState(RMAppAttemptState.FAILED);
+ Assert.assertTrue(attempt3.shouldNotCountFailureToAttemptLimit());
+
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ MockAM am4 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
+ Assert.assertTrue(((RMAppAttemptImpl) attempt4).mayBeLastAttempt());
+
+ // fail the AM normally
+ nm1.nodeHeartbeat(am4.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ am4.waitForState(RMAppAttemptState.FAILED);
+ Assert.assertFalse(attempt4.shouldNotCountFailureToAttemptLimit());
+
+ // AM should not be restarted.
+ rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);
+ Assert.assertEquals(4, app1.getAppAttempts().size());
+ rm1.stop();
+ }
+
+ // Test RM restarts after AM container is preempted,
+ // or regular RM restart/failover, new RM should not count
+ // AM failure towards the max-retry-account and should be able to
+ // re-launch the AM.
+ @Test(timeout = 50000)
+ public void testshouldNotCountFailureToAttemptLimitOnRMRestart()
+ throws Exception {
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
+ conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
+ conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
+ // explicitly set max-am-retry count as 1.
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
+ MemoryRMStateStore memStore = new MemoryRMStateStore();
+ memStore.init(conf);
+
+ MockRM rm1 = new MockRM(conf, memStore);
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
+ nm1.registerNode();
+ RMApp app1 = rm1.submitApp(200);
+ RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ CapacityScheduler scheduler =
+ (CapacityScheduler) rm1.getResourceScheduler();
+ ContainerId amContainer =
+ ContainerId.newInstance(am1.getApplicationAttemptId(), 1);
+
+ // Forcibly preempt the am container;
+ scheduler.killContainer(scheduler.getRMContainer(amContainer));
+
+ am1.waitForState(RMAppAttemptState.FAILED);
+ Assert.assertTrue(attempt1.shouldNotCountFailureToAttemptLimit());
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+
+ // state store has 1 attempt stored.
+ ApplicationState appState =
+ memStore.getState().getApplicationState().get(app1.getApplicationId());
+ Assert.assertEquals(1, appState.getAttemptCount());
+ // attempt stored has the preempted container exit status.
+ Assert.assertEquals(ContainerExitStatus.PREEMPTED,
+ appState.getAttempt(am1.getApplicationAttemptId())
+ .getAMContainerExitStatus());
+
+ // AM should be restarted even though max-am-attempt is 1.
+ MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
+ Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
+
+ // Restart rm.
+ MockRM rm2 = new MockRM(conf, memStore);
+ rm2.start();
+
+ // re-register the NM
+ nm1.setResourceTrackerService(rm2.getResourceTrackerService());
+ NMContainerStatus status = Records.newRecord(NMContainerStatus.class);
+ status
+ .setContainerExitStatus(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
+ status.setContainerId(attempt2.getMasterContainer().getId());
+ status.setContainerState(ContainerState.COMPLETE);
+ status.setDiagnostics("");
+ nm1.registerNode(Collections.singletonList(status), null);
+
+ rm2.waitForState(attempt2.getAppAttemptId(), RMAppAttemptState.FAILED);
+ // Will automatically start the third AppAttempt
+ rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ MockAM am3 =
+ rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
+ MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am3);
+ RMAppAttempt attempt3 =
+ rm2.getRMContext().getRMApps().get(app1.getApplicationId())
+ .getCurrentAppAttempt();
+ Assert.assertFalse(attempt3.shouldNotCountFailureToAttemptLimit());
+ Assert.assertEquals(ContainerExitStatus.INVALID,
+ appState.getAttempt(am3.getApplicationAttemptId())
+ .getAMContainerExitStatus());
+
+ rm1.stop();
+ rm2.stop();
+ }
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
index 5fcb475..4ed0a0d 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
@@ -27,7 +27,6 @@
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
@@ -37,15 +36,11 @@
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.junit.Assert;
@@ -345,122 +340,4 @@ public void testNMTokensRebindOnAMRestart() throws Exception {
Assert.assertTrue(transferredTokens.containsAll(expectedNMTokens));
rm1.stop();
}
-
- // AM container preempted should not be counted towards AM max retry count.
- @Test(timeout = 20000)
- public void testAMPreemptedNotCountedForAMFailures() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
- conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
- ResourceScheduler.class);
- // explicitly set max-am-retry count as 1.
- conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
- MockRM rm1 = new MockRM(conf);
- rm1.start();
- MockNM nm1 =
- new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
- nm1.registerNode();
- RMApp app1 = rm1.submitApp(200);
- RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
- MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
- CapacityScheduler scheduler =
- (CapacityScheduler) rm1.getResourceScheduler();
- ContainerId amContainer =
- ContainerId.newInstance(am1.getApplicationAttemptId(), 1);
- // Preempt the first attempt;
- scheduler.killContainer(scheduler.getRMContainer(amContainer));
-
- am1.waitForState(RMAppAttemptState.FAILED);
- Assert.assertTrue(attempt1.isPreempted());
- rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
- // AM should be restarted even though max-am-attempt is 1.
- MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
- RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
- Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
-
- // Preempt the second attempt.
- ContainerId amContainer2 =
- ContainerId.newInstance(am2.getApplicationAttemptId(), 1);
- scheduler.killContainer(scheduler.getRMContainer(amContainer2));
-
- am2.waitForState(RMAppAttemptState.FAILED);
- Assert.assertTrue(attempt2.isPreempted());
- rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
- MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
- RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
- Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt());
-
- // fail the AM normally
- nm1.nodeHeartbeat(am3.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
- am3.waitForState(RMAppAttemptState.FAILED);
- Assert.assertFalse(attempt3.isPreempted());
-
- // AM should not be restarted.
- rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED);
- Assert.assertEquals(3, app1.getAppAttempts().size());
- rm1.stop();
- }
-
- // Test RM restarts after AM container is preempted, new RM should not count
- // AM preemption failure towards the max-retry-account and should be able to
- // re-launch the AM.
- @Test(timeout = 20000)
- public void testPreemptedAMRestartOnRMRestart() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
- conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
- ResourceScheduler.class);
- conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
- conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
- // explicitly set max-am-retry count as 1.
- conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
- MemoryRMStateStore memStore = new MemoryRMStateStore();
- memStore.init(conf);
-
- MockRM rm1 = new MockRM(conf, memStore);
- rm1.start();
- MockNM nm1 =
- new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
- nm1.registerNode();
- RMApp app1 = rm1.submitApp(200);
- RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
- MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
- CapacityScheduler scheduler =
- (CapacityScheduler) rm1.getResourceScheduler();
- ContainerId amContainer =
- ContainerId.newInstance(am1.getApplicationAttemptId(), 1);
-
- // Forcibly preempt the am container;
- scheduler.killContainer(scheduler.getRMContainer(amContainer));
-
- am1.waitForState(RMAppAttemptState.FAILED);
- Assert.assertTrue(attempt1.isPreempted());
- rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
-
- // state store has 1 attempt stored.
- ApplicationState appState =
- memStore.getState().getApplicationState().get(app1.getApplicationId());
- Assert.assertEquals(1, appState.getAttemptCount());
- // attempt stored has the preempted container exit status.
- Assert.assertEquals(ContainerExitStatus.PREEMPTED,
- appState.getAttempt(am1.getApplicationAttemptId())
- .getAMContainerExitStatus());
- // Restart rm.
- MockRM rm2 = new MockRM(conf, memStore);
- nm1.setResourceTrackerService(rm2.getResourceTrackerService());
- nm1.registerNode();
- rm2.start();
-
- // Restarted RM should re-launch the am.
- MockAM am2 =
- rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
- MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am2);
- RMAppAttempt attempt2 =
- rm2.getRMContext().getRMApps().get(app1.getApplicationId())
- .getCurrentAppAttempt();
- Assert.assertFalse(attempt2.isPreempted());
- Assert.assertEquals(ContainerExitStatus.INVALID,
- appState.getAttempt(am2.getApplicationAttemptId())
- .getAMContainerExitStatus());
- rm1.stop();
- rm2.stop();
- }
}