diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
index ca5a146..d4fcdb3 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
@@ -43,6 +43,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy;
import org.apache.hadoop.yarn.util.resource.Resources;
@Private
@@ -481,7 +482,9 @@ public ActiveUsersManager getActiveUsersManager() {
/**
* Check whether this queue can run this application master under the
- * maxAMShare limit
+ * maxAMShare limit for DRF policy.
+ * Additional check whether the AM takes all available VCores for fifo
+ * policy and fair policy since they don't care VCore usage.
*
* @param amResource
* @return true if this queue can run
@@ -494,8 +497,34 @@ public boolean canRunAppAM(Resource amResource) {
}
Resource maxAMResource = Resources.multiply(getFairShare(), maxAMShare);
Resource ifRunAMResource = Resources.add(amResourceUsage, amResource);
- return !policy
- .checkIfAMResourceUsageOverLimit(ifRunAMResource, maxAMResource);
+
+ boolean overMaxAMShareLimit = policy
+ .checkIfAMResourceUsageOverLimit(ifRunAMResource, maxAMResource);
+
+ // For fair policy and fifo policy which doesn't check VCore usages,
+ // additionally check if the AM takes all available VCores or
+ // over maxResource to avoid deadlock.
+ if (!overMaxAMShareLimit && !policy.equals(
+ SchedulingPolicy.getInstance(DominantResourceFairnessPolicy.class))) {
+ overMaxAMShareLimit = ifRunAMResource.getVirtualCores() >=
+ scheduler.getRootQueueMetrics().getAvailableVirtualCores() ||
+ isVCoreOverMaxResource(ifRunAMResource, this);
+ }
+
+ return !overMaxAMShareLimit;
+ }
+
+ private boolean isVCoreOverMaxResource(Resource res, FSQueue queue){
+ if (queue != null) {
+ return false;
+ }
+
+ if (res.getVirtualCores() >= scheduler.getAllocationConfiguration().
+ getMaxResources(queue.getName()).getVirtualCores()) {
+ return true;
+ }
+
+ return isVCoreOverMaxResource(res, queue.getParent());
}
public void addAMResourceUsage(Resource amResource) {
diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
index a75b5ce..3a4106e 100644
--- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
+++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
@@ -3286,6 +3286,7 @@ public void testQueueMaxAMShare() throws Exception {
@Test
public void testQueueMaxAMShareDefault() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+ conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 6);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("");
@@ -3301,6 +3302,8 @@ public void testQueueMaxAMShareDefault() throws Exception {
out.println("");
out.println("");
out.println("");
+ out.println(
+ "fair");
out.println("");
out.close();
@@ -3309,7 +3312,7 @@ public void testQueueMaxAMShareDefault() throws Exception {
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node =
- MockNodes.newNodeInfo(1, Resources.createResource(8192, 20),
+ MockNodes.newNodeInfo(1, Resources.createResource(8192, 10),
0, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
@@ -3377,6 +3380,28 @@ public void testQueueMaxAMShareDefault() throws Exception {
0, app2.getLiveContainers().size());
assertEquals("Queue2's AM resource usage should be 0 MB memory",
0, queue2.getAmResourceUsage().getMemory());
+
+ // Remove the app2
+ AppAttemptRemovedSchedulerEvent appRemovedEvent2 =
+ new AppAttemptRemovedSchedulerEvent(attId2,
+ RMAppAttemptState.FINISHED, false);
+ scheduler.handle(appRemovedEvent2);
+ scheduler.update();
+
+ // AM3 can pass the fair share checking, but it takes all available VCore,
+ // So the AM3 is not accepted.
+ ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
+ createApplicationWithAMResource(attId3, "queue3", "test1", amResource1);
+ createSchedulingRequestExistingApplication(1024, 6, amPriority, attId3);
+ FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
+ scheduler.update();
+ scheduler.handle(updateEvent);
+ assertEquals("Application2's AM resource shouldn't be updated",
+ 0, app3.getAMResource().getMemory());
+ assertEquals("Application2's AM should not be running",
+ 0, app3.getLiveContainers().size());
+ assertEquals("Queue2's AM resource usage should be 0 MB memory",
+ 0, queue3.getAmResourceUsage().getMemory());
}
/**