diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 366bad0a4f2..7ecdf8f99ba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1981,7 +1981,7 @@ public Resource getTotalPendingResourcesConsideringUserLimit( Resource minpendingConsideringUserLimit = Resources.componentwiseMin( userNameToHeadroom.get(userName), pending); Resources.addTo(totalPendingConsideringUserLimit, - minpendingConsideringUserLimit); + pending); Resources.subtractFrom(userNameToHeadroom.get(userName), minpendingConsideringUserLimit); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java new file mode 100644 index 00000000000..a40b3280d65 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java @@ -0,0 +1,220 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitorManager; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import java.util.ArrayList; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestCapacitySchedulerPreemptToBalance + extends CapacitySchedulerPreemptionTestBase { + + @Override + @Before + public void setUp() throws Exception { + conf = new CapacitySchedulerConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true); + conf.setClass(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, + ProportionalCapacityPreemptionPolicy.class, SchedulingEditPolicy.class); + conf = (CapacitySchedulerConfiguration) getConfigurationWithMultipleQueues( + this.conf); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 100 * GB); + + // Set preemption related configurations + conf.setInt(CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, + 0); + conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, + 1.0f); + conf.setFloat( + CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + 1.0f); + conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL, + 60000L); + mgr = new NullRMNodeLabelsManager(); + mgr.init(this.conf); + clock = mock(Clock.class); + when(clock.getTime()).thenReturn(0L); + } + + private Configuration getConfigurationWithMultipleQueues( + Configuration config) { + CapacitySchedulerConfiguration conf = + new CapacitySchedulerConfiguration(config); + + // Define top-level queues + conf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] { "a", "b", "c", "d"}); + + final String A = CapacitySchedulerConfiguration.ROOT + ".a"; + conf.setCapacity(A, 30); + conf.setMaximumCapacity(A, 100); + conf.setUserLimitFactor(A, 100); + + final String B = CapacitySchedulerConfiguration.ROOT + ".b"; + conf.setCapacity(B, 30); + conf.setMaximumCapacity(B, 100); + conf.setUserLimitFactor(B, 300); + + final String C = CapacitySchedulerConfiguration.ROOT + ".c"; + conf.setCapacity(C, 30); + conf.setMaximumCapacity(C, 100); + conf.setUserLimitFactor(C, 300); + + final String D = CapacitySchedulerConfiguration.ROOT + ".d"; + conf.setCapacity(D, 10); + conf.setMaximumCapacity(D, 100); + conf.setUserLimitFactor(D, 100); + + return conf; + } + + @Test(timeout = 600000) + public void testPreemptionToBalanceUserLimitCap() throws Exception { + /** + * Test case: Submit three applications (app1/app2/app3) to different + * queues, queue structure: + * + *
+     *               Root
+     *            /  |  \  \
+     *           a   b   c  d
+     *          30  30  30  10
+     * 
+ * + */ + + Logger.getRootLogger().setLevel(Level.DEBUG); + conf.setBoolean( + CapacitySchedulerConfiguration.PREEMPTION_TO_BALANCE_QUEUES_BEYOND_GUARANTEED, + true); + conf.setLong( + CapacitySchedulerConfiguration.MAX_WAIT_BEFORE_KILL_FOR_QUEUE_BALANCE_PREEMPTION, + 2*1000); + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration( + this.conf); + + MockRM rm1 = new MockRM(conf); + rm1.getRMContext().setNodeLabelManager(mgr); + rm1.start(); + + MockNM nm1 = rm1.registerNode("h1:1234", 100 * GB); + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // launch an app to queue, AM container should be launched in nm1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + am1.allocate("*", 1 * GB, 9, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 9; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App1 should have 30 containers now + FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt( + am1.getApplicationAttemptId()); + Assert.assertEquals(10, schedulerApp1.getLiveContainers().size()); + + + // submit app2 to b + RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b"); + MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1); + + am2.allocate("*", 1 * GB, 39, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 39; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App2 should have 30 containers now + FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt( + am2.getApplicationAttemptId()); + Assert.assertEquals(40, schedulerApp2.getLiveContainers().size()); + + + // submit app3 to c + RMApp app3 = rm1.submitApp(1 * GB, "app", "user", null, "c"); + MockAM am3 = MockRM.launchAndRegisterAM(app3, rm1, nm1); + + am3.allocate("*", 1 * GB, 49, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 49; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App3 should have 50 containers now + FiCaSchedulerApp schedulerApp3 = cs.getApplicationAttempt( + am3.getApplicationAttemptId()); + Assert.assertEquals(50, schedulerApp3.getLiveContainers().size()); + + // Call editSchedule twice and allocation once, container should get allocated + SchedulingMonitorManager smm = ((CapacityScheduler) rm1. + getResourceScheduler()).getSchedulingMonitorManager(); + SchedulingMonitor smon = smm.getAvailableSchedulingMonitor(); + ProportionalCapacityPreemptionPolicy editPolicy = + (ProportionalCapacityPreemptionPolicy) smon.getSchedulingEditPolicy(); + editPolicy.editSchedule(); + + // submit three more apps to a,b and c, expect the pending resource become + // 6G, 30G and 30G respectively + + am1.allocate("*", 1 * GB, 6, new ArrayList()); + am2.allocate("*", 1 * GB, 30, new ArrayList()); + am3.allocate("*", 1 * GB, 30, new ArrayList()); + + Assert.assertEquals(6 * 1024, schedulerApp1.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + Assert.assertEquals(30 * 1024, schedulerApp2.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + Assert.assertEquals(30 * 1024, schedulerApp3.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + + rm1.close(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java index 800789af72a..d6191f4be7d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java @@ -1111,116 +1111,4 @@ public void testPreemptionForFragmentatedCluster() throws Exception { rm1.close(); } - @Test(timeout = 600000) - public void testPreemptionToBalanceWithCustomTimeout() throws Exception { - /** - * Test case: Submit two application (app1/app2) to different queues, queue - * structure: - * - *
-     *             Root
-     *            /  |  \
-     *           a   b   c
-     *          10   20  70
-     * 
- * - * 1) Two nodes (n1/n2) in the cluster, each of them has 20G. - * - * 2) app1 submit to queue-b, asks for 1G * 5 - * - * 3) app2 submit to queue-c, ask for one 4G container (for AM) - * - * After preemption, we should expect: - * 1. Preempt 4 containers from app1 - * 2. the selected containers will be killed after configured timeout. - * 3. AM of app2 successfully allocated. - */ - conf.setBoolean( - CapacitySchedulerConfiguration.PREEMPTION_TO_BALANCE_QUEUES_BEYOND_GUARANTEED, - true); - conf.setLong( - CapacitySchedulerConfiguration.MAX_WAIT_BEFORE_KILL_FOR_QUEUE_BALANCE_PREEMPTION, - 20*1000); - CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration( - this.conf); - - MockRM rm1 = new MockRM(conf); - rm1.getRMContext().setNodeLabelManager(mgr); - rm1.start(); - - MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); - MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB); - CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); - RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); - RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId()); - - // launch an app to queue, AM container should be launched in nm1 - RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); - MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); - - am1.allocate("*", 1 * GB, 38, new ArrayList()); - - // Do allocation for node1/node2 - for (int i = 0; i < 38; i++) { - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - } - - // App1 should have 39 containers now - FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt( - am1.getApplicationAttemptId()); - Assert.assertEquals(39, schedulerApp1.getLiveContainers().size()); - // 20 from n1 and 19 from n2 - waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), - am1.getApplicationAttemptId(), 20); - waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), - am1.getApplicationAttemptId(), 19); - - - // Submit app2 to queue-c and asks for a 4G container for AM - RMApp app2 = rm1.submitApp(4 * GB, "app", "user", null, "c"); - FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt( - ApplicationAttemptId.newInstance(app2.getApplicationId(), 1)); - - // Call editSchedule: containers are selected to be preemption candidate - SchedulingMonitorManager smm = ((CapacityScheduler) rm1. - getResourceScheduler()).getSchedulingMonitorManager(); - SchedulingMonitor smon = smm.getAvailableSchedulingMonitor(); - ProportionalCapacityPreemptionPolicy editPolicy = - (ProportionalCapacityPreemptionPolicy) smon.getSchedulingEditPolicy(); - editPolicy.editSchedule(); - Assert.assertEquals(4, editPolicy.getToPreemptContainers().size()); - - // check live containers immediately, nothing happen - Assert.assertEquals(39, schedulerApp1.getLiveContainers().size()); - - Thread.sleep(20*1000); - // Call editSchedule again: selected containers are killed - editPolicy.editSchedule(); - waitNumberOfLiveContainersFromApp(schedulerApp1, 35); - - // Call allocation, containers are reserved - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - waitNumberOfReservedContainersFromApp(schedulerApp2, 1); - - // Call editSchedule twice and allocation once, container should get allocated - editPolicy.editSchedule(); - editPolicy.editSchedule(); - - int tick = 0; - while (schedulerApp2.getLiveContainers().size() != 1 && tick < 10) { - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - tick++; - Thread.sleep(100); - } - waitNumberOfReservedContainersFromApp(schedulerApp2, 0); - - rm1.close(); - - - } - - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index b9bfc2aab58..97b6db2f755 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -771,13 +771,14 @@ public void testPendingResourcesConsideringUserLimit() throws Exception { LeafQueue lq = (LeafQueue) cs.getQueue("default"); - // UL = 8GB, so head room of u1 = 8GB - 2GB (AM) - 3GB (Reserved) = 3GB - // u2 = 8GB - 4GB = 4GB - // When not deduct reserved, total-pending = 3G (u1) + 4G (u2) = 7G - // deduct reserved, total-pending = 0G (u1) + 4G = 4G - Assert.assertEquals(7 * GB, lq.getTotalPendingResourcesConsideringUserLimit( + // We should release pending resource be capped at user limit, think about + // a user ask for 10000maps. but cluster can run a max of 1000. In this + // case, as soon as each map finish, other one pending will get scheduled + // When not deduct reserved, total-pending = 3G (u1) + 20G (u2) = 23G + // deduct reserved, total-pending = 0G (u1) + 20G (u2) = 20G + Assert.assertEquals(23 * GB, lq.getTotalPendingResourcesConsideringUserLimit( Resources.createResource(20 * GB), "", false).getMemorySize()); - Assert.assertEquals(4 * GB, lq.getTotalPendingResourcesConsideringUserLimit( + Assert.assertEquals(20 * GB, lq.getTotalPendingResourcesConsideringUserLimit( Resources.createResource(20 * GB), "", true).getMemorySize()); rm1.close(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 04bb7912d1c..79ba7d45383 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -3625,13 +3625,12 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); - // With queue capacity set at 1% of 100GB and user-limit-factor set to 1.0, - // all users (only user_0) queue 'e' should be able to consume 1GB. - // The first container should be assigned to app_0 with no headroom left - // even though user_0's apps are still asking for a total of 4GB. + // We should release pending resource be capped at user limit so that queue + // can take more pending resource to achieve queue balance after all queue + // satisfied with its ideal allocation. assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0 * GB, + assertEquals(4 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, NO_LABEL, false).getMemorySize()); @@ -3642,11 +3641,11 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // user_0 has no headroom due to user-limit-factor of 1.0. However capacity // scheduler will assign one container more than user-limit-factor. - // This container also went to app_0. Still with no neadroom even though + // This container also went to app_0. Still with no headroom even though // app_0 and app_1 are asking for a cumulative 3GB. assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( + assertEquals(3*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // Can't allocate 3rd container due to user-limit. Headroom still 0. @@ -3656,7 +3655,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( + assertEquals(3*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // Increase user-limit-factor from 1GB to 10GB (1% * 10 * 100GB = 10GB). @@ -3786,7 +3785,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority, recordFactory))); - // Pending resource requests for user_1: app_2 and app_3 total 1GB. + // Pending resource requests for user_1: app_2 and app_3 total 3GB. priority = TestUtils.createMockPriority(1); app_2.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, @@ -3797,9 +3796,10 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() priority, recordFactory))); // Start testing... - // With queue capacity set at 1% of 100GB and user-limit-factor set to 1.0, - // queue 'e' should be able to consume 1GB per user. - assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( + // We should release pending resource be capped at user limit so that queue + // can take more pending resource to achieve queue balance after all queue + // satisfied with its ideal allocation. + assertEquals(6*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // None of the apps have assigned resources // user_0's apps: @@ -3814,9 +3814,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() e.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); - // The first container was assigned to user_0's app_0. Queues total headroom - // has 1GB left for user_1. - assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( + // The first container was assigned to user_0's app_0. Queues total pending + // resource becomes 5GB + assertEquals(5*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); @@ -3832,9 +3832,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // user_0 has no headroom due to user-limit-factor of 1.0. However capacity // scheduler will assign one container more than user-limit-factor. So, - // this container went to user_0's app_1. so, headroom for queue 'e'e is - // still 1GB for user_1 - assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( + // this container went to user_0's app_1. so, headroom for queue 'e' becomes + // 4GB + assertEquals(4*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); @@ -3850,7 +3850,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Container was allocated to user_1's app_2 since user_1, Now, no headroom // is left. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( + assertEquals(3*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); @@ -3866,7 +3866,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Allocated to user_1's app_2 since scheduler allocates 1 container // above user resource limit. Available headroom still 0. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( + assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: long app_0_consumption = app_0.getCurrentConsumption().getMemorySize(); @@ -3886,7 +3886,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Cannot allocate 5th container because both users are above their allowed // user resource limit. Values should be the same as previously. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( + assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(app_0_consumption, app_0.getCurrentConsumption().getMemorySize());