diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index 51849f8..87a5448 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -351,7 +351,7 @@ public RMContainer preemptContainer() { RMContainer toBePreempted = null; // If this queue is not over its fair share, reject - if (!preemptContainerPreCheck()) { + if (!canBePreempted()) { return toBePreempted; } @@ -534,16 +534,6 @@ public void setWeights(float weight) { } /** - * Helper method to check if the queue should preempt containers - * - * @return true if check passes (can preempt) or false otherwise - */ - private boolean preemptContainerPreCheck() { - return parent.getPolicy().checkIfUsageOverFairShare(getResourceUsage(), - getFairShare()); - } - - /** * Is a queue being starved for its min share. */ @VisibleForTesting diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index fe0e3e2..e08b386 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -260,6 +260,16 @@ public RMContainer preemptContainer() { readLock.lock(); try { for (FSQueue queue : childQueues) { + // Skip selection for non-preemptable queue + if (!queue.canBePreempted()) { + if (LOG.isDebugEnabled()) { + LOG.debug("skipping from queue=" + getName() + + " because it's a non-preemptable queue or there is no" + + " sub-queues whose resource usage exceeds fair share."); + } + continue; + } + if (candidateQueue == null || comparator.compare(queue, candidateQueue) > 0) { candidateQueue = queue; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java index e4a2197..ee07c5b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java @@ -41,6 +41,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.util.resource.Resources; +import com.google.common.base.Preconditions; + @Private @Unstable public abstract class FSQueue implements Queue, Schedulable { @@ -236,6 +238,31 @@ public void setFairSharePreemptionThreshold(float fairSharePreemptionThreshold) } /** + * Recursively check if the queue can be preempted based on whether the + * resource usage is greater than fair share. + * + * @return true if the queue can be preempted + */ + public boolean canBePreempted() { + Preconditions.checkNotNull(parent, "Parent queue can't be null since" + + " parent's policy is needed for preemptable checking."); + + if (parent.policy.checkIfUsageOverFairShare( + getResourceUsage(), getFairShare())) { + return true; + } else { + // recursively find one queue which can be preempted + for (FSQueue queue: getChildQueues()) { + if (queue.canBePreempted()) { + return true; + } + } + } + + return false; + } + + /** * Recomputes the shares for all child queues and applications based on this * queue's current share */ diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 10f6c2b..b69b23a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -2036,10 +2036,82 @@ public void testPreemptionIsNotDelayedToNextRound() throws Exception { .getLeafQueue("queueA.queueA2", false), clock.getTime()); assertEquals(3277, toPreempt.getMemorySize()); - // verify if the 3 containers required by queueA2 are preempted in the same + // verify if the 4 containers required by queueA2 are preempted in the same // round scheduler.preemptResources(toPreempt); - assertEquals(3, scheduler.getSchedulerApp(app1).getPreemptionContainers() + assertEquals(4, scheduler.getSchedulerApp(app1).getPreemptionContainers() + .size()); + } + + @Test + public void testPreemptionFilterOutNonPreemptableQueues() throws Exception { + conf.setLong(FairSchedulerConfiguration.PREEMPTION_INTERVAL, 5000); + conf.setLong(FairSchedulerConfiguration.WAIT_TIME_BEFORE_KILL, 10000); + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "false"); + + ControlledClock clock = new ControlledClock(); + scheduler.setClock(clock); + + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println(" "); + out.println(" "); + out.println(""); + out.println(""); + out.println(""); + out.println("10"); + out.println(".5"); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add a node of 8 GB + RMNode node1 = MockNodes.newNodeInfo(1, + Resources.createResource(8 * GB, 8), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Run apps in queueA.A1 and queueB + ApplicationAttemptId app1 = createSchedulingRequest(1 * GB, 1, + "queueA.queueA1", "user1", 4, 1); + ApplicationAttemptId app2 = createSchedulingRequest(1 * GB, 1, "queueB", + "user2", 4, 1); + + scheduler.update(); + + NodeUpdateSchedulerEvent nodeUpdate1 = new NodeUpdateSchedulerEvent(node1); + for (int i = 0; i < 8; i++) { + scheduler.handle(nodeUpdate1); + } + + // verify if the apps got the containers they requested + assertEquals(4, scheduler.getSchedulerApp(app1).getLiveContainers().size()); + assertEquals(4, scheduler.getSchedulerApp(app2).getLiveContainers().size()); + + // Now submit an app in queueA.queueA2 + createSchedulingRequest(GB, 1, "queueA.queueA2", "user3", 2, 1); + scheduler.update(); + + // Let 11 sec pass + clock.tickSec(11); + + scheduler.update(); + Resource toPreempt = scheduler.resourceDeficit(scheduler.getQueueManager() + .getLeafQueue("queueA.queueA2", false), clock.getTime()); + assertEquals(2 * GB, toPreempt.getMemorySize()); + + // Verify if containers required by queueA2 are preempted from queueA1 + // instead of queueB + scheduler.preemptResources(toPreempt); + assertEquals(2, scheduler.getSchedulerApp(app1).getPreemptionContainers() + .size()); + assertEquals(0, scheduler.getSchedulerApp(app2).getPreemptionContainers() .size()); }