diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index dfde5ab..d01ec40 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -524,7 +524,7 @@ private Resource assignContainer( node.allocateContainer(allocatedContainer); // If this container is used to run AM, update the leaf queue's AM usage - if (getLiveContainers().size() == 1 && !getUnmanagedAM()) { + if (!isAmRunning() && !getUnmanagedAM()) { getQueue().addAMResourceUsage(container.getResource()); setAmRunning(true); } @@ -568,7 +568,7 @@ private Resource assignContainer(FSSchedulerNode node, boolean reserved) { addSchedulingOpportunity(priority); // Check the AM resource usage for the leaf queue - if (getLiveContainers().size() == 0 && !getUnmanagedAM()) { + if (!isAmRunning() && !getUnmanagedAM()) { if (!getQueue().canRunAppAM(getAMResource())) { return Resources.none(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index 3c97535..8059486 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -125,7 +125,11 @@ public boolean removeApp(FSAppAttempt app) { } // Update AM resource usage if needed - if (runnable && app.isAmRunning() && app.getAMResource() != null) { + // Don't need check whether app.getAMResource() is null, + // because app.getAMResource() will not return null. + // If app.isAmRunning() is true, app.getUnmanagedAM() must be false. + // Because AmRunning is set to true only when getUnmanagedAM() is false. + if (runnable && app.isAmRunning()) { Resources.subtractFrom(amResourceUsage, app.getAMResource()); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 04c7f70..606d937 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -902,8 +902,8 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, application.recordContainerRequestTime(getClock().getTime()); // Set amResource for this app - if (!application.getUnmanagedAM() && ask.size() == 1 - && application.getLiveContainers().isEmpty()) { + if (!application.isAmRunning() && !application.getUnmanagedAM() + && ask.size() == 1) { application.setAMResource(ask.get(0).getCapability()); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 7600a35..c89e331 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -92,6 +92,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; @@ -3635,6 +3636,31 @@ public void testQueueMaxAMShare() throws Exception { assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemory()); + // request non-AM container for app5 + createSchedulingRequestExistingApplication(1024, 1, attId5); + assertEquals("Application5's AM should have 1 container", + 1, app5.getLiveContainers().size()); + // complete AM container before non-AM container is allocated. + // spark application hit this situation. + RMContainer amContainer5 = (RMContainer)app5.getLiveContainers().toArray()[0]; + ContainerExpiredSchedulerEvent containerExpired = + new ContainerExpiredSchedulerEvent(amContainer5.getContainerId()); + scheduler.handle(containerExpired); + assertEquals("Application5's AM should have 0 container", + 0, app5.getLiveContainers().size()); + assertEquals("Queue1's AM resource usage should be 2048 MB memory", + 2048, queue1.getAmResourceUsage().getMemory()); + scheduler.update(); + scheduler.handle(updateEvent); + // non-AM container should be allocated + // check non-AM container allocation is not rejected + // due to queue MaxAMShare limitation. + assertEquals("Application5's AM should have 1 container", + 1, app5.getLiveContainers().size()); + // check non-AM container allocation won't affect queue AmResourceUsage + assertEquals("Queue1's AM resource usage should be 2048 MB memory", + 2048, queue1.getAmResourceUsage().getMemory()); + // Check amResource normalization ApplicationAttemptId attId6 = createAppAttemptId(6, 1); createApplicationWithAMResource(attId6, "queue1", "user1", amResource3);