.../apache/hadoop/yarn/conf/YarnConfiguration.java | 29 +++++++++++++++++++-
.../src/main/resources/yarn-default.xml | 16 +++++++++++
.../resourcemanager/RMActiveServiceContext.java | 15 ++++++++++
.../yarn/server/resourcemanager/RMContext.java | 3 ++
.../yarn/server/resourcemanager/RMContextImpl.java | 13 +++++++++
.../server/resourcemanager/ResourceManager.java | 10 +++++++
.../server/resourcemanager/rmapp/RMAppImpl.java | 3 ++
.../rmapp/attempt/RMAppAttemptImpl.java | 3 ++
.../server/resourcemanager/rmnode/RMNodeImpl.java | 32 ++++++++++++++++++++++
.../rmapp/attempt/TestRMAppAttemptTransitions.java | 8 ++++--
10 files changed, 129 insertions(+), 3 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index a8a87ad8c9c..24eb89c5e66 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -558,7 +558,19 @@ public static boolean isAclEnabled(Configuration conf) {
public static final String RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS =
RM_PREFIX + "rm.container-allocation.expiry-interval-ms";
public static final int DEFAULT_RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS = 600000;
-
+
+ /** How long to wait for AM container allocation default 15 minutes */
+ public static final String AM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS = RM_PREFIX
+ + "am.container-allocation.expiry-interval-ms";
+ public static final int DEFAULT_AM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS = 900000;
+
+ /**
+ * AM Container Allocation Expire Enabled or not. By default enabled = false.
+ */
+ public static final String AMCONTAINER_ALLOCATIONEXPIRY_ENABLED = RM_PREFIX
+ + "am.container-allocation.expiry.enabled";
+ public static final boolean DEFAULT_AMCONTAINER_ALLOCATIONEXPIRY_ENABLED = false;
+
/** Path to file with nodes to include.*/
public static final String RM_NODES_INCLUDE_FILE_PATH =
RM_PREFIX + "nodes.include-path";
@@ -4786,4 +4798,19 @@ public static long getSkipNodeInterval(Configuration conf) {
public static void main(String[] args) throws Exception {
new YarnConfiguration(new Configuration()).writeXml(System.out);
}
+
+ /**
+ * Returns true if am allocation expiry is enabled else false
+ * @param conf
+ * @return boolean
+ */
+ public static boolean isAMContainerAllocationExpiryEnabled(
+ Configuration conf) {
+ if (null == conf) {
+ return false;
+ }
+ return conf.getBoolean(
+ YarnConfiguration.AMCONTAINER_ALLOCATIONEXPIRY_ENABLED,
+ YarnConfiguration.DEFAULT_AMCONTAINER_ALLOCATIONEXPIRY_ENABLED);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index f7d9fc1d2b0..fb502d45c83 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1138,6 +1138,22 @@
yarn.resourcemanager.rm.container-allocation.expiry-interval-ms
600000
+
+
+
+ AM container is not allocated for specified time then kill the application
+
+ yarn.resourcemanager.am.container-allocation.expiry.enabled
+ false
+
+
+
+
+ The expiry interval for AM container
+
+ yarn.resourcemanager.am.container-allocation.expiry-interval-ms
+ 900000
+
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java
index f1b0c794031..c19528b58ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMActiveServiceContext.java
@@ -30,6 +30,7 @@
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirer;
import org.apache.hadoop.yarn.nodelabels.NodeAttributesManager;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.SystemCredentialsForAppsProto;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMDelegatedNodeLabelsUpdater;
@@ -91,6 +92,7 @@
private AMLivelinessMonitor amFinishingMonitor;
private RMStateStore stateStore = null;
private ContainerAllocationExpirer containerAllocationExpirer;
+ private AMContainerAllocationExpirer amContainerAllocationExpirer;
private DelegationTokenRenewer delegationTokenRenewer;
private AMRMTokenSecretManager amRMTokenSecretManager;
private RMContainerTokenSecretManager containerTokenSecretManager;
@@ -604,4 +606,17 @@ public Long getTokenSequenceNo() {
public void incrTokenSequenceNo() {
this.tokenSequenceNo.incrementAndGet();
}
+
+ @Private
+ @Unstable
+ public AMContainerAllocationExpirer getAMContainerAllocationExpirer() {
+ return this.amContainerAllocationExpirer;
+ }
+
+ @Private
+ @Unstable
+ void setAMContainerAllocationExpirer(
+ AMContainerAllocationExpirer amContainerAllocationExpirer) {
+ this.amContainerAllocationExpirer = amContainerAllocationExpirer;
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
index 55420bd9270..c7e0aa31f69 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
@@ -41,6 +41,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
@@ -203,4 +204,6 @@ void setMultiNodeSortingManager(
long getTokenSequenceNo();
void incrTokenSequenceNo();
+
+ AMContainerAllocationExpirer getAMContainerAllocationExpirer();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
index 7f10138494e..7675f5a51d4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
@@ -47,6 +47,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
@@ -678,4 +679,16 @@ public long getTokenSequenceNo() {
public void incrTokenSequenceNo() {
this.activeServiceContext.incrTokenSequenceNo();
}
+
+ @VisibleForTesting
+ public void setAMContainerAllocationExpirer(
+ AMContainerAllocationExpirer amContainerAllocationExpirer) {
+ activeServiceContext
+ .setAMContainerAllocationExpirer(amContainerAllocationExpirer);
+ }
+
+ @Override
+ public AMContainerAllocationExpirer getAMContainerAllocationExpirer() {
+ return activeServiceContext.getAMContainerAllocationExpirer();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index c315b335415..3f34b713082 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -95,6 +95,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetimeMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
@@ -655,6 +656,7 @@ protected static void validateConfigs(Configuration conf) {
private EventHandler schedulerDispatcher;
private ApplicationMasterLauncher applicationMasterLauncher;
private ContainerAllocationExpirer containerAllocationExpirer;
+ private AMContainerAllocationExpirer amContainerAllocationExpirer;
private ResourceManager rm;
private boolean fromActive = false;
private StandByTransitionRunnable standByTransitionRunnable;
@@ -676,6 +678,14 @@ protected void serviceInit(Configuration configuration) throws Exception {
addService(containerAllocationExpirer);
rmContext.setContainerAllocationExpirer(containerAllocationExpirer);
+ if (YarnConfiguration.isAMContainerAllocationExpiryEnabled(conf)) {
+ amContainerAllocationExpirer = new AMContainerAllocationExpirer(
+ rmContext);
+ addService(amContainerAllocationExpirer);
+ ((RMContextImpl) rmContext)
+ .setAMContainerAllocationExpirer(amContainerAllocationExpirer);
+ }
+
AMLivelinessMonitor amLivelinessMonitor = createAMLivelinessMonitor();
addService(amLivelinessMonitor);
rmContext.setAMLivelinessMonitor(amLivelinessMonitor);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 94f7bb97bfe..ff97bf72afc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -77,6 +77,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirerUtil;
import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistManager;
import org.apache.hadoop.yarn.server.resourcemanager.blacklist.DisabledBlacklistManager;
import org.apache.hadoop.yarn.server.resourcemanager.blacklist.SimpleBlacklistManager;
@@ -1339,6 +1340,8 @@ public FinalSavingTransition(Object transitionToDo,
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
+ AMContainerAllocationExpirerUtil.getInstance().unregister(app.rmContext,
+ app);
app.rememberTargetTransitionsAndStoreState(event, transitionToDo,
targetedFinalState, stateToBeStored);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 2d6de3750e9..ae9e6b6af6f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -72,6 +72,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirerUtil;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistManager;
@@ -1232,6 +1233,8 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
ClusterMetrics.getMetrics().addAMContainerAllocationDelay(
allocationDelay);
appAttempt.storeAttempt();
+ AMContainerAllocationExpirerUtil.getInstance()
+ .unregister(appAttempt.rmContext, appAttempt);
return RMAppAttemptState.ALLOCATED_SAVING;
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index cec9915e0d1..2540865a68a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -74,6 +74,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.NodesListManager;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirerUtil;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
@@ -1365,6 +1366,18 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {
LOG.info("Node " + rmNode.nodeId +
" reported UNHEALTHY with details: " +
remoteNodeHealthStatus.getHealthReport());
+ List runningApps = rmNode.getRunningApps();
+ Iterator iterator = runningApps.iterator();
+ while (iterator.hasNext()) {
+ ApplicationId appId = iterator.next();
+ if (!isAMLaunchedInSameRMNode(rmNode, appId)) {
+ iterator.remove();
+ }
+ }
+ AMContainerAllocationExpirerUtil.getInstance()
+ .registerWithAMContainerAllocationExpirer(rmNode.context,
+ runningApps);
+
// if a node in decommissioning receives an unhealthy report,
// it will stay in decommissioning.
if (isNodeDecommissioning) {
@@ -1401,6 +1414,25 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {
return initialState;
}
+
+ /**
+ * Return true if AM launched in the same rmNode Else return false
+ *
+ * @param rmNode
+ * @param appId
+ * @return
+ */
+ private boolean isAMLaunchedInSameRMNode(RMNodeImpl rmNode,
+ ApplicationId appId) {
+ RMApp rmApp = rmNode.context.getRMApps().get(appId);
+ if (null != rmApp
+ && null != rmApp.getCurrentAppAttempt().getMasterContainer()
+ && rmNode.getNodeID().equals(
+ rmApp.getCurrentAppAttempt().getMasterContainer().getNodeId())) {
+ return true;
+ }
+ return false;
+ }
}
public static class StatusUpdateWhenUnHealthyTransition implements
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 4e5ff3f7687..eff69392689 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -75,6 +75,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
+import org.apache.hadoop.yarn.server.resourcemanager.amcontainer.AMContainerAllocationExpirer;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
@@ -245,7 +246,9 @@ public void setUp() throws Exception {
SecurityUtil.setAuthenticationMethod(authMethod, conf);
UserGroupInformation.setConfiguration(conf);
InlineDispatcher rmDispatcher = new InlineDispatcher();
-
+
+ AMContainerAllocationExpirer amContainerAllocationExpirer = mock(
+ AMContainerAllocationExpirer.class);
ContainerAllocationExpirer containerAllocationExpirer =
mock(ContainerAllocationExpirer.class);
amLivelinessMonitor = mock(AMLivelinessMonitor.class);
@@ -260,7 +263,8 @@ public void setUp() throws Exception {
new RMContainerTokenSecretManager(conf),
nmTokenManager,
clientToAMTokenManager);
-
+ ((RMContextImpl) rmContext)
+ .setAMContainerAllocationExpirer(amContainerAllocationExpirer);
store = mock(RMStateStore.class);
((RMContextImpl) rmContext).setStateStore(store);
publisher = mock(SystemMetricsPublisher.class);