diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/ControlledClock.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/ControlledClock.java
deleted file mode 100644
index 198117b..0000000
--- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/ControlledClock.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.mapreduce.v2.app;
-
-import org.apache.hadoop.yarn.util.Clock;
-
-public class ControlledClock implements Clock {
- private long time = -1;
- private final Clock actualClock;
- public ControlledClock(Clock actualClock) {
- this.actualClock = actualClock;
- }
- public synchronized void setTime(long time) {
- this.time = time;
- }
- public synchronized void reset() {
- time = -1;
- }
-
- @Override
- public synchronized long getTime() {
- if (time != -1) {
- return time;
- }
- return actualClock.getTime();
- }
-
-}
diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
index 4aa11ed..1330344 100644
--- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
+++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
@@ -34,7 +34,6 @@
import java.util.Map;
import org.junit.Assert;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -59,7 +58,6 @@
import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.ClusterInfo;
-import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
import org.apache.hadoop.mapreduce.v2.app.MRApp;
import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
@@ -87,6 +85,7 @@
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.Test;
import org.mockito.ArgumentCaptor;
diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
index e554281..9664b8f 100644
--- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
+++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
@@ -42,10 +42,8 @@
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.ClusterInfo;
-import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
import org.apache.hadoop.mapreduce.v2.app.MRApp;
import org.junit.Assert;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -110,6 +108,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.After;
import org.junit.Before;
diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java
index 1e07062..e2e943a 100644
--- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java
+++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java
@@ -22,6 +22,7 @@
import java.io.File;
import java.io.FileOutputStream;
import java.util.UUID;
+
import org.junit.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@@ -30,12 +31,12 @@
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
import org.apache.hadoop.test.CoreTestDriver;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.After;
import org.junit.AfterClass;
diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java
index 601904b..d2edd19 100644
--- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java
+++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecutionWithMRApp.java
@@ -24,14 +24,12 @@
import java.util.Random;
import org.junit.Assert;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
-import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
import org.apache.hadoop.mapreduce.v2.app.MRApp;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.app.job.Task;
@@ -44,6 +42,7 @@
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.Test;
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
index 1ee04f0..b80c3c9 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
@@ -45,6 +45,15 @@
* {@link ContainerLaunchContext} of the container in which the
* ApplicationMaster is executed.
*
+ *
maxAppAttempts. The maximum number of application attempts.
+ * It should be no larger than the global number of max attempts in the
+ * Yarn configuration.
+ * attemptFailuresValidityInterval. The default value is -1.
+ * when attemptFailuresValidityInterval in millisecond is set to > 0,
+ * the failure number will no take failures which happen out of the
+ * validityInterval into failure count. If failure count reaches to
+ * maxAppAttempts, the application will be failed.
+ *
*
*
*
@@ -103,6 +112,22 @@ public static ApplicationSubmissionContext newInstance(
resource, null);
}
+ @Public
+ @Stable
+ public static ApplicationSubmissionContext newInstance(
+ ApplicationId applicationId, String applicationName, String queue,
+ Priority priority, ContainerLaunchContext amContainer,
+ boolean isUnmanagedAM, boolean cancelTokensWhenComplete,
+ int maxAppAttempts, Resource resource, String applicationType,
+ boolean keepContainers, long attemptFailuresValidityInterval) {
+ ApplicationSubmissionContext context =
+ newInstance(applicationId, applicationName, queue, priority,
+ amContainer, isUnmanagedAM, cancelTokensWhenComplete, maxAppAttempts,
+ resource, applicationType, keepContainers);
+ context.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
+ return context;
+ }
+
/**
* Get the ApplicationId of the submitted application.
* @return ApplicationId of the submitted application
@@ -338,4 +363,22 @@ public abstract void setKeepContainersAcrossApplicationAttempts(
@Public
@Stable
public abstract void setApplicationTags(Set tags);
+
+ /**
+ * Get the attemptFailuresValidityInterval in millisecond for the application
+ *
+ * @return the attemptFailuresValidityInterval
+ */
+ @Public
+ @Stable
+ public abstract long getAttemptFailuresValidityInterval();
+
+ /**
+ * Set the attemptFailuresValidityInterval in millisecond for the application
+ * @param attemptFailuresValidityInterval
+ */
+ @Public
+ @Stable
+ public abstract void setAttemptFailuresValidityInterval(
+ long attemptFailuresValidityInterval);
}
\ No newline at end of file
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 3f1fa6c..81e11ba 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -288,6 +288,7 @@ message ApplicationSubmissionContextProto {
optional string applicationType = 10 [default = "YARN"];
optional bool keep_containers_across_application_attempts = 11 [default = false];
repeated string applicationTags = 12;
+ optional int64 attempt_failures_validity_interval = 13 [default = -1];
}
enum ApplicationAccessTypeProto {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
index c2f3268..7b49a16 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
@@ -402,4 +402,17 @@ private ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
private ResourceProto convertToProtoFormat(Resource t) {
return ((ResourcePBImpl)t).getProto();
}
+
+ @Override
+ public long getAttemptFailuresValidityInterval() {
+ ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder;
+ return p.getAttemptFailuresValidityInterval();
+ }
+
+ @Override
+ public void setAttemptFailuresValidityInterval(
+ long attemptFailuresValidityInterval) {
+ maybeInitBuilder();
+ builder.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ControlledClock.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ControlledClock.java
new file mode 100644
index 0000000..16bd785
--- /dev/null
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ControlledClock.java
@@ -0,0 +1,42 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.yarn.util;
+
+
+public class ControlledClock implements Clock {
+ private long time = -1;
+ private final Clock actualClock;
+ public ControlledClock(Clock actualClock) {
+ this.actualClock = actualClock;
+ }
+ public synchronized void setTime(long time) {
+ this.time = time;
+ }
+ public synchronized void reset() {
+ time = -1;
+ }
+
+ @Override
+ public synchronized long getTime() {
+ if (time != -1) {
+ return time;
+ }
+ return actualClock.getTime();
+ }
+
+}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java
index 0a3b269..dca762c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java
@@ -280,7 +280,8 @@ private void loadRMAppState(RMState rmState) throws Exception {
attemptStateData.getFinalTrackingUrl(),
attemptStateData.getDiagnostics(),
attemptStateData.getFinalApplicationStatus(),
- attemptStateData.getAMContainerExitStatus());
+ attemptStateData.getAMContainerExitStatus(),
+ attemptStateData.getFinishTime());
// assert child node name is same as application attempt id
assert attemptId.equals(attemptState.getAttemptId());
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
index f56517c..dc43821 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
@@ -167,7 +167,8 @@ public synchronized void updateApplicationAttemptStateInternal(
attemptStateData.getFinalTrackingUrl(),
attemptStateData.getDiagnostics(),
attemptStateData.getFinalApplicationStatus(),
- attemptStateData.getAMContainerExitStatus());
+ attemptStateData.getAMContainerExitStatus(),
+ attemptStateData.getFinishTime());
ApplicationState appState =
state.getApplicationState().get(
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index 714a108..3602c35 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -259,6 +259,7 @@ public RMStateStore() {
final Container masterContainer;
final Credentials appAttemptCredentials;
long startTime = 0;
+ long finishTime = 0;
// fields set when attempt completes
RMAppAttemptState state;
String finalTrackingUrl = "N/A";
@@ -270,14 +271,14 @@ public ApplicationAttemptState(ApplicationAttemptId attemptId,
Container masterContainer, Credentials appAttemptCredentials,
long startTime) {
this(attemptId, masterContainer, appAttemptCredentials, startTime, null,
- null, "", null, ContainerExitStatus.INVALID);
+ null, "", null, ContainerExitStatus.INVALID, 0);
}
public ApplicationAttemptState(ApplicationAttemptId attemptId,
Container masterContainer, Credentials appAttemptCredentials,
long startTime, RMAppAttemptState state, String finalTrackingUrl,
String diagnostics, FinalApplicationStatus amUnregisteredFinalStatus,
- int exitStatus) {
+ int exitStatus, long finishTime) {
this.attemptId = attemptId;
this.masterContainer = masterContainer;
this.appAttemptCredentials = appAttemptCredentials;
@@ -287,6 +288,7 @@ public ApplicationAttemptState(ApplicationAttemptId attemptId,
this.diagnostics = diagnostics == null ? "" : diagnostics;
this.amUnregisteredFinalStatus = amUnregisteredFinalStatus;
this.exitStatus = exitStatus;
+ this.finishTime = finishTime;
}
public Container getMasterContainer() {
@@ -316,6 +318,9 @@ public FinalApplicationStatus getFinalApplicationStatus() {
public int getAMContainerExitStatus(){
return this.exitStatus;
}
+ public long getFinishTime() {
+ return this.finishTime;
+ }
}
/**
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
index 1b1ec76..d0e9e1a 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
@@ -603,7 +603,8 @@ private void loadApplicationAttemptState(ApplicationState appState,
attemptStateData.getFinalTrackingUrl(),
attemptStateData.getDiagnostics(),
attemptStateData.getFinalApplicationStatus(),
- attemptStateData.getAMContainerExitStatus());
+ attemptStateData.getAMContainerExitStatus(),
+ attemptStateData.getFinishTime());
appState.attempts.put(attemptState.getAttemptId(), attemptState);
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java
index 5cb9787..923600f 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java
@@ -43,7 +43,8 @@ public static ApplicationAttemptStateData newInstance(
ApplicationAttemptId attemptId, Container container,
ByteBuffer attemptTokens, long startTime, RMAppAttemptState finalState,
String finalTrackingUrl, String diagnostics,
- FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus) {
+ FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus,
+ long finishTime) {
ApplicationAttemptStateData attemptStateData =
Records.newRecord(ApplicationAttemptStateData.class);
attemptStateData.setAttemptId(attemptId);
@@ -55,6 +56,7 @@ public static ApplicationAttemptStateData newInstance(
attemptStateData.setStartTime(startTime);
attemptStateData.setFinalApplicationStatus(amUnregisteredFinalStatus);
attemptStateData.setAMContainerExitStatus(exitStatus);
+ attemptStateData.setFinishTime(finishTime);
return attemptStateData;
}
@@ -72,7 +74,7 @@ public static ApplicationAttemptStateData newInstance(
attemptState.getStartTime(), attemptState.getState(),
attemptState.getFinalTrackingUrl(), attemptState.getDiagnostics(),
attemptState.getFinalApplicationStatus(),
- attemptState.getAMContainerExitStatus());
+ attemptState.getAMContainerExitStatus(), attemptState.getFinishTime());
}
public abstract ApplicationAttemptStateDataProto getProto();
@@ -157,4 +159,12 @@ public abstract void setFinalApplicationStatus(
public abstract int getAMContainerExitStatus();
public abstract void setAMContainerExitStatus(int exitStatus);
+
+ /**
+ * Get the finish time of the application attempt.
+ * @return finish time of the application attempt
+ */
+ public abstract long getFinishTime();
+
+ public abstract void setFinishTime(long finishTime);
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java
index 5c62d63..809dbf3 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java
@@ -294,4 +294,16 @@ private FinalApplicationStatusProto convertToProtoFormat(FinalApplicationStatus
private FinalApplicationStatus convertFromProtoFormat(FinalApplicationStatusProto s) {
return ProtoUtils.convertFromProtoFormat(s);
}
+
+ @Override
+ public long getFinishTime() {
+ ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder;
+ return p.getFinishTime();
+ }
+
+ @Override
+ public void setFinishTime(long finishTime) {
+ maybeInitBuilder();
+ builder.setFinishTime(finishTime);
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 0b81f96..c71cc9e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -32,10 +32,10 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
@@ -80,8 +80,12 @@
import org.apache.hadoop.yarn.state.SingleArcTransition;
import org.apache.hadoop.yarn.state.StateMachine;
import org.apache.hadoop.yarn.state.StateMachineFactory;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.hadoop.yarn.util.resource.Resources;
+import com.google.common.annotations.VisibleForTesting;
+
@SuppressWarnings({ "rawtypes", "unchecked" })
public class RMAppImpl implements RMApp, Recoverable {
@@ -109,6 +113,12 @@
private final String applicationType;
private final Set applicationTags;
+ private final long attemptFailuresValidityInterval;
+
+ private Clock systemClock;
+
+ private boolean isNumAttemptsBeyondThreshold = false;
+
// Mutable fields
private long startTime;
private long finishTime = 0;
@@ -327,6 +337,8 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
ApplicationMasterService masterService, long submitTime,
String applicationType, Set applicationTags) {
+ this.systemClock = new SystemClock();
+
this.applicationId = applicationId;
this.name = name;
this.rmContext = rmContext;
@@ -339,7 +351,7 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
this.scheduler = scheduler;
this.masterService = masterService;
this.submitTime = submitTime;
- this.startTime = System.currentTimeMillis();
+ this.startTime = this.systemClock.getTime();
this.applicationType = applicationType;
this.applicationTags = applicationTags;
@@ -357,6 +369,9 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
this.maxAppAttempts = individualMaxAppAttempts;
}
+ this.attemptFailuresValidityInterval =
+ submissionContext.getAttemptFailuresValidityInterval();
+
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
this.readLock = lock.readLock();
this.writeLock = lock.writeLock();
@@ -877,7 +892,7 @@ private String getAppAttemptFailedDiagnostics(RMAppEvent event) {
msg = "Unmanaged application " + this.getApplicationId()
+ " failed due to " + failedEvent.getDiagnostics()
+ ". Failing the application.";
- } else if (getNumFailedAppAttempts() >= this.maxAppAttempts) {
+ } else if (this.isNumAttemptsBeyondThreshold) {
msg = "Application " + this.getApplicationId() + " failed "
+ this.maxAppAttempts + " times due to "
+ failedEvent.getDiagnostics() + ". Failing the application.";
@@ -910,7 +925,7 @@ private void rememberTargetTransitionsAndStoreState(RMAppEvent event,
RMAppState stateToBeStored) {
rememberTargetTransitions(event, transitionToDo, targetFinalState);
this.stateBeforeFinalSaving = getState();
- this.storedFinishTime = System.currentTimeMillis();
+ this.storedFinishTime = this.systemClock.getTime();
LOG.info("Updating application " + this.applicationId
+ " with final state: " + this.targetedFinalState);
@@ -1077,7 +1092,7 @@ public void transition(RMAppImpl app, RMAppEvent event) {
}
app.finishTime = app.storedFinishTime;
if (app.finishTime == 0 ) {
- app.finishTime = System.currentTimeMillis();
+ app.finishTime = app.systemClock.getTime();
}
// Recovered apps that are completed were not added to scheduler, so no
// need to remove them from scheduler.
@@ -1096,11 +1111,16 @@ public void transition(RMAppImpl app, RMAppEvent event) {
private int getNumFailedAppAttempts() {
int completedAttempts = 0;
+ long endTime = this.systemClock.getTime();
// Do not count AM preemption, hardware failures or NM resync
// as attempt failure.
for (RMAppAttempt attempt : attempts.values()) {
if (attempt.shouldCountTowardsMaxAttemptRetry()) {
- completedAttempts++;
+ if (this.attemptFailuresValidityInterval <= 0
+ || (attempt.getFinishTime() > endTime
+ - this.attemptFailuresValidityInterval)) {
+ completedAttempts++;
+ }
}
}
return completedAttempts;
@@ -1118,8 +1138,10 @@ public AttemptFailedTransition(RMAppState initialState) {
@Override
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
+ int numberOfFailure = app.getNumFailedAppAttempts();
+
if (!app.submissionContext.getUnmanagedAM()
- && app.getNumFailedAppAttempts() < app.maxAppAttempts) {
+ && numberOfFailure < app.maxAppAttempts) {
boolean transferStateFromPreviousAttempt = false;
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
transferStateFromPreviousAttempt =
@@ -1137,6 +1159,9 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) {
}
return initialState;
} else {
+ if (numberOfFailure >= app.maxAppAttempts) {
+ app.isNumAttemptsBeyondThreshold = true;
+ }
app.rememberTargetTransitionsAndStoreState(event,
new AttemptFailedFinalStateSavedTransition(), RMAppState.FAILED,
RMAppState.FAILED);
@@ -1214,4 +1239,10 @@ public RMAppMetrics getRMAppMetrics() {
return new RMAppMetrics(resourcePreempted,
numNonAMContainerPreempted, numAMContainerPreempted);
}
+
+ @Private
+ @VisibleForTesting
+ public void setSystemClock(Clock clock) {
+ this.systemClock = clock;
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
index b5ed92c..943a5e5 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
@@ -213,4 +213,10 @@
* @return metrics
*/
RMAppAttemptMetrics getRMAppAttemptMetrics();
+
+ /**
+ * the finish time of the application attempt.
+ * @return the finish time of the application attempt.
+ */
+ long getFinishTime();
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 19fc800..7efc931 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -42,7 +42,6 @@
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -141,6 +140,7 @@
private String originalTrackingUrl = "N/A";
private String proxiedTrackingUrl = "N/A";
private long startTime = 0;
+ private long finishTime = 0;
// Set to null initially. Will eventually get set
// if an RMAppAttemptUnregistrationEvent occurs
@@ -733,6 +733,7 @@ public void recover(RMState state) throws Exception {
this.proxiedTrackingUrl = generateProxyUriWithScheme(originalTrackingUrl);
this.finalStatus = attemptState.getFinalApplicationStatus();
this.startTime = attemptState.getStartTime();
+ this.finishTime = attemptState.getFinishTime();
}
public void transferStateFromPreviousAttempt(RMAppAttempt attempt) {
@@ -1019,10 +1020,12 @@ private void rememberTargetTransitionsAndStoreState(RMAppAttemptEvent event,
}
RMStateStore rmStore = rmContext.getStateStore();
+ setFinishTime(System.currentTimeMillis());
ApplicationAttemptState attemptState =
new ApplicationAttemptState(applicationAttemptId, getMasterContainer(),
rmStore.getCredentialsFromAppAttempt(this), startTime,
- stateToBeStored, finalTrackingUrl, diags, finalStatus, exitStatus);
+ stateToBeStored, finalTrackingUrl, diags, finalStatus, exitStatus,
+ getFinishTime());
LOG.info("Updating application attempt " + applicationAttemptId
+ " with final state: " + targetedFinalState + ", and exit status: "
+ exitStatus);
@@ -1732,4 +1735,23 @@ public RMAppAttemptMetrics getRMAppAttemptMetrics() {
// lock
return attemptMetrics;
}
+
+ @Override
+ public long getFinishTime() {
+ try {
+ this.readLock.lock();
+ return this.finishTime;
+ } finally {
+ this.readLock.unlock();
+ }
+ }
+
+ private void setFinishTime(long finishTime) {
+ try {
+ this.writeLock.lock();
+ this.finishTime = finishTime;
+ } finally {
+ this.writeLock.unlock();
+ }
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
index eab6af1..8baf4dc 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto
@@ -78,6 +78,7 @@ message ApplicationAttemptStateDataProto {
optional int64 start_time = 7;
optional FinalApplicationStatusProto final_application_status = 8;
optional int32 am_container_exit_status = 9 [default = -1000];
+ optional int64 finish_time = 10;
}
message EpochProto {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index 3817637..1338a6c 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -278,7 +278,16 @@ public RMApp submitApp(int masterMemory, String name, String user,
boolean waitForAccepted, boolean keepContainers) throws Exception {
return submitApp(masterMemory, name, user, acls, unmanaged, queue,
maxAppAttempts, ts, appType, waitForAccepted, keepContainers,
- false, null);
+ false, null, 0);
+ }
+
+ public RMApp submitApp(int masterMemory, long attemptFailuresValidityInterval)
+ throws Exception {
+ return submitApp(masterMemory, "", UserGroupInformation.getCurrentUser()
+ .getShortUserName(), null, false, null,
+ super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true, false,
+ false, null, attemptFailuresValidityInterval);
}
public RMApp submitApp(int masterMemory, String name, String user,
@@ -286,6 +295,17 @@ public RMApp submitApp(int masterMemory, String name, String user,
int maxAppAttempts, Credentials ts, String appType,
boolean waitForAccepted, boolean keepContainers, boolean isAppIdProvided,
ApplicationId applicationId) throws Exception {
+ return submitApp(masterMemory, name, user, acls, unmanaged, queue,
+ maxAppAttempts, ts, appType, waitForAccepted, keepContainers,
+ isAppIdProvided, applicationId, 0);
+ }
+
+ public RMApp submitApp(int masterMemory, String name, String user,
+ Map acls, boolean unmanaged, String queue,
+ int maxAppAttempts, Credentials ts, String appType,
+ boolean waitForAccepted, boolean keepContainers, boolean isAppIdProvided,
+ ApplicationId applicationId, long attemptFailuresValidityInterval)
+ throws Exception {
ApplicationId appId = isAppIdProvided ? applicationId : null;
ApplicationClientProtocol client = getClientRMService();
if (! isAppIdProvided) {
@@ -321,6 +341,7 @@ public RMApp submitApp(int masterMemory, String name, String user,
clc.setTokens(securityTokens);
}
sub.setAMContainerSpec(clc);
+ sub.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
req.setApplicationSubmissionContext(sub);
UserGroupInformation fakeUser =
UserGroupInformation.createUserForTesting(user, new String[] {"someGroup"});
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
index 6c5c818..e95cee7 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
@@ -44,6 +44,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
@@ -53,7 +54,10 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.ControlledClock;
import org.apache.hadoop.yarn.util.Records;
+import org.apache.hadoop.yarn.util.SystemClock;
import org.junit.Assert;
import org.junit.Test;
@@ -584,4 +588,128 @@ public void testRMRestartOrFailoverNotCountedForAMFailures()
rm1.stop();
rm2.stop();
}
+
+ @Test (timeout = 50000)
+ public void testRMAppAttemptFailuresValidityInterval() throws Exception {
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+ ResourceScheduler.class);
+ conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
+ conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
+ // explicitly set max-am-retry count as 2.
+ conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+ MemoryRMStateStore memStore = new MemoryRMStateStore();
+ memStore.init(conf);
+
+ MockRM rm1 = new MockRM(conf, memStore);
+ rm1.start();
+ MockNM nm1 =
+ new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
+ nm1.registerNode();
+
+ // set window size to a larger number : 20s
+ // we will verify the app should be failed if
+ // two continuous attempts failed in 20s.
+ RMApp app = rm1.submitApp(200, 20000);
+
+ MockAM am = MockRM.launchAM(app, rm1, nm1);
+ // Fail current attempt normally
+ nm1.nodeHeartbeat(am.getApplicationAttemptId(),
+ 1, ContainerState.COMPLETE);
+ am.waitForState(RMAppAttemptState.FAILED);
+ // launch the second attempt
+ rm1.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
+ Assert.assertEquals(2, app.getAppAttempts().size());
+ Assert.assertTrue(((RMAppAttemptImpl) app.getCurrentAppAttempt())
+ .mayBeLastAttempt());
+ MockAM am_2 = MockRM.launchAndRegisterAM(app, rm1, nm1);
+ am_2.waitForState(RMAppAttemptState.RUNNING);
+ nm1.nodeHeartbeat(am_2.getApplicationAttemptId(),
+ 1, ContainerState.COMPLETE);
+ am_2.waitForState(RMAppAttemptState.FAILED);
+ // current app should be failed.
+ rm1.waitForState(app.getApplicationId(), RMAppState.FAILED);
+
+ ControlledClock clock = new ControlledClock(new SystemClock());
+ // set window size to 6s
+ RMAppImpl app1 = (RMAppImpl)rm1.submitApp(200, 6000);;
+ app1.setSystemClock(clock);
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+
+ // Fail attempt1 normally
+ nm1.nodeHeartbeat(am1.getApplicationAttemptId(),
+ 1, ContainerState.COMPLETE);
+ am1.waitForState(RMAppAttemptState.FAILED);
+
+ // launch the second attempt
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ Assert.assertEquals(2, app1.getAppAttempts().size());
+
+ RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
+ Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
+ MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ am2.waitForState(RMAppAttemptState.RUNNING);
+
+ // wait for 6 seconds
+ clock.setTime(System.currentTimeMillis() + 6*1000);
+ // Fail attempt2 normally
+ nm1.nodeHeartbeat(am2.getApplicationAttemptId(),
+ 1, ContainerState.COMPLETE);
+ am2.waitForState(RMAppAttemptState.FAILED);
+
+ // can launch the third attempt successfully
+ rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+ Assert.assertEquals(3, app1.getAppAttempts().size());
+ RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
+ clock.reset();
+ MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+ am3.waitForState(RMAppAttemptState.RUNNING);
+
+ // Restart rm.
+ @SuppressWarnings("resource")
+ MockRM rm2 = new MockRM(conf, memStore);
+ rm2.start();
+
+ // re-register the NM
+ nm1.setResourceTrackerService(rm2.getResourceTrackerService());
+ NMContainerStatus status = Records.newRecord(NMContainerStatus.class);
+ status
+ .setContainerExitStatus(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
+ status.setContainerId(attempt3.getMasterContainer().getId());
+ status.setContainerState(ContainerState.COMPLETE);
+ status.setDiagnostics("");
+ nm1.registerNode(Collections.singletonList(status), null);
+
+ rm2.waitForState(attempt3.getAppAttemptId(), RMAppAttemptState.FAILED);
+
+ rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+
+ // Lauch Attempt 4
+ MockAM am4 =
+ rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
+
+ // wait for 6 seconds
+ clock.setTime(System.currentTimeMillis() + 6*1000);
+ // Fail attempt4 normally
+ nm1
+ .nodeHeartbeat(am4.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ am4.waitForState(RMAppAttemptState.FAILED);
+
+ // can launch the 5th attempt successfully
+ rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
+
+ MockAM am5 =
+ rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 5, nm1);
+ clock.reset();
+ am5.waitForState(RMAppAttemptState.RUNNING);
+
+ // Fail attempt5 normally
+ nm1
+ .nodeHeartbeat(am5.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
+ am5.waitForState(RMAppAttemptState.FAILED);
+
+ rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
+ rm1.stop();
+ rm2.stop();
+ }
}
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
index 620ba9f..e408870 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
@@ -304,7 +304,8 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper)
oldAttemptState.getAppAttemptCredentials(),
oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED,
"myTrackingUrl", "attemptDiagnostics",
- FinalApplicationStatus.SUCCEEDED, 100);
+ FinalApplicationStatus.SUCCEEDED, 100,
+ oldAttemptState.getFinishTime());
store.updateApplicationAttemptState(newAttemptState);
// test updating the state of an app/attempt whose initial state was not
@@ -327,7 +328,8 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper)
oldAttemptState.getAppAttemptCredentials(),
oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED,
"myTrackingUrl", "attemptDiagnostics",
- FinalApplicationStatus.SUCCEEDED, 111);
+ FinalApplicationStatus.SUCCEEDED, 111,
+ oldAttemptState.getFinishTime());
store.updateApplicationAttemptState(dummyAttempt);
// let things settle down