diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index 5a36bd1..2020193 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -313,7 +313,15 @@ private static void addDeprecatedKeys() {
public static final String RM_AM_MAX_ATTEMPTS =
RM_PREFIX + "am.max-attempts";
public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
-
+
+ /**
+ * It's a global setting for all application.
+ */
+ public static final String RM_APP_ATTEMPT_FAILURES_VALIDITY_INTERVAL =
+ RM_PREFIX + "app-attempt.failure-interval";
+ public static final long DEFAULT_RM_APP_ATTEMPT_FAILURES_VALIDITY_INTERVAL =
+ 10*60*1000;
+
/** The keytab for the resource manager.*/
public static final String RM_KEYTAB =
RM_PREFIX + "keytab";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 9bbdb94..60eded5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -234,6 +234,20 @@
+ When attemptFailuresValidityInterval in milliseconds is
+ set to > 0, the failure number will no take failures which happen
+ out of the validityInterval into failure count. If failure count
+ reaches to maxAppAttempts, the application will be failed.. It's a global
+ setting for all applications. Each application master can specify
+ its individual attemptFailuresValidityInterval via the API, but the
+ individual number cannot be smaller than the global upper bound. If it is,
+ the resourcemanager will override it. The default number is set to 600000.
+
+ yarn.resourcemanager.app-attempt.failure-interval
+ 600000
+
+
+
How often to check that containers are still alive.
yarn.resourcemanager.container.liveness-monitor.interval-ms
600000
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 9220849..ad6069b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -428,8 +428,24 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext,
this.maxAppAttempts = individualMaxAppAttempts;
}
- this.attemptFailuresValidityInterval =
+ long globalAttemptFailuresValidityInterval =
+ conf.getLong(
+ YarnConfiguration.RM_APP_ATTEMPT_FAILURES_VALIDITY_INTERVAL,
+ YarnConfiguration.DEFAULT_RM_APP_ATTEMPT_FAILURES_VALIDITY_INTERVAL);
+ long individualAttemptFailuresValidityInterval =
submissionContext.getAttemptFailuresValidityInterval();
+ if (individualAttemptFailuresValidityInterval < globalAttemptFailuresValidityInterval) {
+ this.attemptFailuresValidityInterval =
+ globalAttemptFailuresValidityInterval;
+ LOG.warn("The specific AttemptFailuresValidityInterval: "
+ + individualMaxAppAttempts + " for application: "
+ + applicationId.getId() + " is invalid, because it is smaller than "
+ + globalMaxAppAttempts
+ + ". Use the global AttemptFailuresValidityInterval instead.");
+ } else {
+ this.attemptFailuresValidityInterval =
+ individualAttemptFailuresValidityInterval;
+ }
if (this.attemptFailuresValidityInterval > 0) {
LOG.info("The attemptFailuresValidityInterval for the application: "
+ this.applicationId + " is " + this.attemptFailuresValidityInterval