diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 0b150c2..412b659 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -893,6 +893,11 @@ public static boolean isAclEnabled(Configuration conf) { NM_PREFIX + "container-diagnostics-maximum-size"; public static final int DEFAULT_NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE = 10000; + /** Minimum container restart interval. */ + public static final String NM_CONTAINER_RETRY_MINIMUM_INTERVAL_SECS = + NM_PREFIX + "container-retry-minimum-interval-secs"; + public static final int DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_SECS = 1; + /** Interval at which the delayed token removal thread runs */ public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS = RM_PREFIX + "delayed.delegation-token.removal-interval-ms"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index a38d0d8..238394c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1589,6 +1589,12 @@ + Minimum container restart interval in seconds. + yarn.nodemanager.container-retry-minimum-interval-secs + 1 + + + Max number of threads in NMClientAsync to process container management events yarn.client.nodemanager-client-async.thread-pool-max-size diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index b1ddc2e..b16cf1e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -155,6 +155,17 @@ public ContainerImpl(Configuration conf, Dispatcher dispatcher, this.containerRetryContext = ContainerRetryContext.NEVER_RETRY_CONTEXT; } this.remainingRetryAttempts = containerRetryContext.getMaxRetries(); + int minimumRestartInterval = conf.getInt( + YarnConfiguration.NM_CONTAINER_RETRY_MINIMUM_INTERVAL_SECS, + YarnConfiguration.DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_SECS) + * 1000; + if (containerRetryContext.getRetryPolicy() + != ContainerRetryPolicy.NEVER_RETRY + && containerRetryContext.getRetryInterval() < minimumRestartInterval) { + LOG.info("Set restart interval to minimum value " + minimumRestartInterval + + " for container " + containerTokenIdentifier.getContainerID()); + this.containerRetryContext.setRetryInterval(minimumRestartInterval); + } this.diagnosticsMaxSize = conf.getInt( YarnConfiguration.NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE, YarnConfiguration.DEFAULT_NM_CONTAINER_DIAGNOSTICS_MAXIMUM_SIZE);