diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java index 4fb1862b887..c25a3a26870 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java @@ -83,10 +83,10 @@ public TempQueuePerPartition(String queueName, Resource current, if (queue instanceof LeafQueue) { LeafQueue l = (LeafQueue) queue; - pending = l.getTotalPendingResourcesConsideringUserLimit( + pending = l.getTotalPendingResourcesConsideringUserLimit(current, totalPartitionResource, partition, false); pendingDeductReserved = l.getTotalPendingResourcesConsideringUserLimit( - totalPartitionResource, partition, true); + current, totalPartitionResource, partition, true); leafQueue = l; } else { pending = Resources.createResource(0); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 366bad0a4f2..717ca1b8c90 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1948,7 +1948,7 @@ public void recoverContainer(Resource clusterResource, * excessive preemption. * @return Total pending resource considering user limit */ - public Resource getTotalPendingResourcesConsideringUserLimit( + public Resource getTotalPendingResourcesConsideringUserLimit(Resource usedRes, Resource clusterResources, String partition, boolean deductReservedFromPending) { try { @@ -1958,8 +1958,8 @@ public Resource getTotalPendingResourcesConsideringUserLimit( Resource totalPendingConsideringUserLimit = Resource.newInstance(0, 0); for (FiCaSchedulerApp app : getApplications()) { String userName = app.getUser(); + User user = getUser(userName); if (!userNameToHeadroom.containsKey(userName)) { - User user = getUser(userName); Resource headroom = Resources.subtract( getResourceLimitForActiveUsers(app.getUser(), clusterResources, partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), @@ -1985,6 +1985,9 @@ public Resource getTotalPendingResourcesConsideringUserLimit( Resources.subtractFrom(userNameToHeadroom.get(userName), minpendingConsideringUserLimit); } + totalPendingConsideringUserLimit = Resources.componentwiseMin( + Resources.subtract(clusterResources, usedRes), + totalPendingConsideringUserLimit); return totalPendingConsideringUserLimit; } finally { readLock.unlock(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java index 83ee6c09fcf..ef2121716d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java @@ -758,13 +758,8 @@ private Resource computeUserLimit(String userName, Resource clusterResource, * User limit resource is determined by: max(currentCapacity / #activeUsers, * currentCapacity * user-limit-percentage%) */ - Resource userLimitResource = Resources.max(resourceCalculator, - partitionResource, - Resources.divideAndCeil(resourceCalculator, resourceUsed, - usersSummedByWeight), - Resources.divideAndCeil(resourceCalculator, - Resources.multiplyAndRoundDown(currentCapacity, getUserLimit()), - 100)); + Resource userLimitResource = Resources.multiplyAndRoundDown(queueCapacity, + getUserLimitFactor()); // User limit is capped by maxUserLimit // - maxUserLimit = queueCapacity * user-limit-factor diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java index 64b56fba3ba..517fc41d94b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java @@ -756,11 +756,12 @@ private void setupQueue(CSQueue queue, String q, String[] queueExprArray, } if (!isParent(queueExprArray, idx)) { LeafQueue lq = (LeafQueue) queue; - when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), - isA(String.class), eq(false))).thenReturn(pending); - when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), - isA(String.class), eq(true))).thenReturn( - Resources.subtract(pending, reserved)); + when(lq.getTotalPendingResourcesConsideringUserLimit( + isA(Resource.class), isA(Resource.class), isA(String.class), + eq(false))).thenReturn(pending); + when(lq.getTotalPendingResourcesConsideringUserLimit( + isA(Resource.class), isA(Resource.class), isA(String.class), + eq(true))).thenReturn(Resources.subtract(pending, reserved)); } ru.setUsed(partitionName, parseResourceFromString(values[2].trim())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java index 22a241f6fec..605d0954353 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java @@ -1318,10 +1318,12 @@ LeafQueue mockLeafQueue(ParentQueue p, Resource tot, int i, Resource[] abs, List appAttemptIdList = new ArrayList(); when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), - isA(String.class), eq(false))).thenReturn(pending[i]); + isA(Resource.class), isA(String.class), eq(false))).thenReturn( + pending[i]); when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), - isA(String.class), eq(true))).thenReturn(Resources.componentwiseMax( + isA(Resource.class), isA(String.class), eq(true))).thenReturn( + Resources.componentwiseMax( Resources.subtract(pending[i], reserved[i] == null ? Resources.none() : reserved[i]), Resources.none())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index 24ae244a969..a0bb147f319 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -664,7 +664,7 @@ public void testHeadroom() throws Exception { // Schedule to compute queue.assignContainers(clusterResource, node_0, new ResourceLimits( clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); - Resource expectedHeadroom = Resources.createResource(5*16*GB, 1); + Resource expectedHeadroom = Resources.createResource(50*16*GB, 1); assertEquals(expectedHeadroom, app_0_0.getHeadroom()); // Submit second application from user_0, check headroom @@ -704,7 +704,7 @@ public void testHeadroom() throws Exception { // Schedule to compute queue.assignContainers(clusterResource, node_0, new ResourceLimits( clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); // Schedule to compute - expectedHeadroom = Resources.createResource(10*16*GB / 2, 1); // changes + expectedHeadroom = Resources.createResource(100*16*GB / 2, 1); // changes assertEquals(expectedHeadroom, app_0_0.getHeadroom()); assertEquals(expectedHeadroom, app_0_1.getHeadroom()); assertEquals(expectedHeadroom, app_1_0.getHeadroom()); @@ -720,7 +720,7 @@ public void testHeadroom() throws Exception { queue.getUsersManager().userLimitNeedsRecompute(); queue.assignContainers(clusterResource, node_0, new ResourceLimits( clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); // Schedule to compute - expectedHeadroom = Resources.createResource(9*16*GB / 2, 1); // changes + expectedHeadroom = Resources.createResource(90*16*GB / 2, 1); // changes assertEquals(expectedHeadroom, app_0_0.getHeadroom()); assertEquals(expectedHeadroom, app_0_1.getHeadroom()); assertEquals(expectedHeadroom, app_1_0.getHeadroom()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java index d73f1c84373..59d6994b3bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java @@ -675,9 +675,9 @@ public void testHeadroom() throws Exception { queue.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); - //head room = queue capacity = 50 % 90% 160 GB * 0.25 (UL) + //head room = queue capacity = 50 % 90% 160 GB (ULF) Resource expectedHeadroom = - Resources.createResource((int) (0.5 * 0.9 * 160 * 0.25) * GB, 1); + Resources.createResource((int) (0.5 * 0.9 * 160) * GB, 1); assertEquals(expectedHeadroom, app_0_0.getHeadroom()); // Submit second application from user_0, check headroom @@ -708,7 +708,7 @@ public void testHeadroom() throws Exception { //head room for default label + head room for y partition //head room for y partition = 100% 50%(b queue capacity ) * 160 * GB Resource expectedHeadroomWithReqInY = Resources.add( - Resources.createResource((int) (0.25 * 0.5 * 160) * GB, 1), + Resources.createResource((int) (0.5 * 160) * GB, 1), expectedHeadroom); assertEquals(expectedHeadroomWithReqInY, app_0_1.getHeadroom()); @@ -735,11 +735,11 @@ public void testHeadroom() throws Exception { SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); // Schedule to compute //head room = queue capacity = (50 % 90% 160 GB)/2 (for 2 users) expectedHeadroom = - Resources.createResource((int) (0.5 * 0.9 * 160 * 0.25) * GB, 1); + Resources.createResource((int) (0.5 * 0.9 * 160) * GB, 1); //head room for default label + head room for y partition //head room for y partition = 100% 50%(b queue capacity ) * 160 * GB expectedHeadroomWithReqInY = Resources.add( - Resources.createResource((int) (0.25 * 0.5 * 160) * GB, 1), + Resources.createResource((int) (0.5 * 160) * GB, 1), expectedHeadroom); assertEquals(expectedHeadroom, app_0_0.getHeadroom()); assertEquals(expectedHeadroomWithReqInY, app_0_1.getHeadroom()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 8d948b57ba4..ab46f1b4f0f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -3366,9 +3366,9 @@ public void testHierarchyQueuesCurrentLimits() throws Exception { waitContainerAllocated(am1, 1 * GB, 1, 2, rm1, nm1); - // Maximum resoure of b1 is 100 * 0.895 * 0.792 = 71 GB - // 2 GBs used by am, so it's 71 - 2 = 69G. - Assert.assertEquals(69 * GB, + // Maximum resource of b1 is 100GB + // 2 GBs used by am, so it's 100 - 2 = 98G. + Assert.assertEquals(98 * GB, am1.doHeartbeat().getAvailableResources().getMemorySize()); RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b2"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java new file mode 100644 index 00000000000..cc6fe4c6a96 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerPreemptToBalance.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitorManager; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import java.util.ArrayList; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestCapacitySchedulerPreemptToBalance + extends CapacitySchedulerPreemptionTestBase { + + @Override + @Before + public void setUp() throws Exception { + conf = new CapacitySchedulerConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true); + conf.setClass(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, + ProportionalCapacityPreemptionPolicy.class, SchedulingEditPolicy.class); + conf = (CapacitySchedulerConfiguration) getConfigurationWithMultipleQueues( + this.conf); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 100 * GB); + + // Set preemption related configurations + conf.setInt(CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, + 0); + conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, + 1.0f); + conf.setFloat( + CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR, + 1.0f); + conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL, + 60000L); + mgr = new NullRMNodeLabelsManager(); + mgr.init(this.conf); + clock = mock(Clock.class); + when(clock.getTime()).thenReturn(0L); + } + + private Configuration getConfigurationWithMultipleQueues( + Configuration config) { + CapacitySchedulerConfiguration conf = + new CapacitySchedulerConfiguration(config); + + // Define top-level queues + conf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] { "a", "b", "c", "d"}); + + final String A = CapacitySchedulerConfiguration.ROOT + ".a"; + conf.setCapacity(A, 30); + conf.setMaximumCapacity(A, 100); + conf.setUserLimitFactor(A, 100); + + final String B = CapacitySchedulerConfiguration.ROOT + ".b"; + conf.setCapacity(B, 30); + conf.setMaximumCapacity(B, 100); + conf.setUserLimitFactor(B, 300); + + final String C = CapacitySchedulerConfiguration.ROOT + ".c"; + conf.setCapacity(C, 30); + conf.setMaximumCapacity(C, 100); + conf.setUserLimitFactor(C, 300); + + final String D = CapacitySchedulerConfiguration.ROOT + ".d"; + conf.setCapacity(D, 10); + conf.setMaximumCapacity(D, 100); + conf.setUserLimitFactor(D, 100); + + return conf; + } + + @Test(timeout = 600000) + public void testPreemptionToBalanceUserLimitCap() throws Exception { + /** + * Test case: Submit three applications (app1/app2/app3) to different + * queues, queue structure: + * + *
+     *               Root
+     *            /  |  \  \
+     *           a   b   c  d
+     *          30  30  30  10
+     * 
+ * + * 1) Only one node (n1) in the cluster, it has 100G. + * + * 2) app1 submit to queue-a, asks for 10G used, 6G pending. + * + * 3) app2 submit to queue-b, asks for 40G used, 30G pending. + * + * 4) app3 submit to queue-c, asks for 50G used, 30G pending. + * + * By changing the user limit for pending resource, a, b, c should have 6GB, + * 30GN and 30GB pending respectively. + */ + + conf.setBoolean( + CapacitySchedulerConfiguration.PREEMPTION_TO_BALANCE_QUEUES_BEYOND_GUARANTEED, + true); + conf.setLong( + CapacitySchedulerConfiguration.MAX_WAIT_BEFORE_KILL_FOR_QUEUE_BALANCE_PREEMPTION, + 2*1000); + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration( + this.conf); + + MockRM rm1 = new MockRM(conf); + rm1.getRMContext().setNodeLabelManager(mgr); + rm1.start(); + + MockNM nm1 = rm1.registerNode("h1:1234", 100 * GB); + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // launch an app to queue, AM container should be launched in nm1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + am1.allocate("*", 1 * GB, 9, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 9; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App1 should have 10 containers now + FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt( + am1.getApplicationAttemptId()); + Assert.assertEquals(10, schedulerApp1.getLiveContainers().size()); + + + // submit app2 to b + RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b"); + MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1); + + am2.allocate("*", 1 * GB, 39, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 39; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App2 should have 30 containers now + FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt( + am2.getApplicationAttemptId()); + Assert.assertEquals(40, schedulerApp2.getLiveContainers().size()); + + + // submit app3 to c + RMApp app3 = rm1.submitApp(1 * GB, "app", "user", null, "c"); + MockAM am3 = MockRM.launchAndRegisterAM(app3, rm1, nm1); + + am3.allocate("*", 1 * GB, 49, new ArrayList()); + + // Do allocation for node1 + for (int i = 0; i < 49; i++) { + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + } + + // App3 should have 50 containers now + FiCaSchedulerApp schedulerApp3 = cs.getApplicationAttempt( + am3.getApplicationAttemptId()); + Assert.assertEquals(50, schedulerApp3.getLiveContainers().size()); + + // Call editSchedule twice and allocation once, container should get allocated + SchedulingMonitorManager smm = ((CapacityScheduler) rm1. + getResourceScheduler()).getSchedulingMonitorManager(); + SchedulingMonitor smon = smm.getAvailableSchedulingMonitor(); + ProportionalCapacityPreemptionPolicy editPolicy = + (ProportionalCapacityPreemptionPolicy) smon.getSchedulingEditPolicy(); + editPolicy.editSchedule(); + + // submit three more apps to a,b and c, expect the pending resource become + // 6G, 30G and 30G respectively + + am1.allocate("*", 1 * GB, 6, new ArrayList()); + am2.allocate("*", 1 * GB, 30, new ArrayList()); + am3.allocate("*", 1 * GB, 30, new ArrayList()); + + Assert.assertEquals(6 * 1024, schedulerApp1.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + Assert.assertEquals(30 * 1024, schedulerApp2.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + Assert.assertEquals(30 * 1024, schedulerApp3.getAppAttemptResourceUsage() + .getPending(RMNodeLabelsManager.NO_LABEL).getMemorySize()); + + rm1.close(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java index 800789af72a..d6191f4be7d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerSurgicalPreemption.java @@ -1111,116 +1111,4 @@ public void testPreemptionForFragmentatedCluster() throws Exception { rm1.close(); } - @Test(timeout = 600000) - public void testPreemptionToBalanceWithCustomTimeout() throws Exception { - /** - * Test case: Submit two application (app1/app2) to different queues, queue - * structure: - * - *
-     *             Root
-     *            /  |  \
-     *           a   b   c
-     *          10   20  70
-     * 
- * - * 1) Two nodes (n1/n2) in the cluster, each of them has 20G. - * - * 2) app1 submit to queue-b, asks for 1G * 5 - * - * 3) app2 submit to queue-c, ask for one 4G container (for AM) - * - * After preemption, we should expect: - * 1. Preempt 4 containers from app1 - * 2. the selected containers will be killed after configured timeout. - * 3. AM of app2 successfully allocated. - */ - conf.setBoolean( - CapacitySchedulerConfiguration.PREEMPTION_TO_BALANCE_QUEUES_BEYOND_GUARANTEED, - true); - conf.setLong( - CapacitySchedulerConfiguration.MAX_WAIT_BEFORE_KILL_FOR_QUEUE_BALANCE_PREEMPTION, - 20*1000); - CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration( - this.conf); - - MockRM rm1 = new MockRM(conf); - rm1.getRMContext().setNodeLabelManager(mgr); - rm1.start(); - - MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); - MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB); - CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); - RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); - RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId()); - - // launch an app to queue, AM container should be launched in nm1 - RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); - MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); - - am1.allocate("*", 1 * GB, 38, new ArrayList()); - - // Do allocation for node1/node2 - for (int i = 0; i < 38; i++) { - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - } - - // App1 should have 39 containers now - FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt( - am1.getApplicationAttemptId()); - Assert.assertEquals(39, schedulerApp1.getLiveContainers().size()); - // 20 from n1 and 19 from n2 - waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), - am1.getApplicationAttemptId(), 20); - waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), - am1.getApplicationAttemptId(), 19); - - - // Submit app2 to queue-c and asks for a 4G container for AM - RMApp app2 = rm1.submitApp(4 * GB, "app", "user", null, "c"); - FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt( - ApplicationAttemptId.newInstance(app2.getApplicationId(), 1)); - - // Call editSchedule: containers are selected to be preemption candidate - SchedulingMonitorManager smm = ((CapacityScheduler) rm1. - getResourceScheduler()).getSchedulingMonitorManager(); - SchedulingMonitor smon = smm.getAvailableSchedulingMonitor(); - ProportionalCapacityPreemptionPolicy editPolicy = - (ProportionalCapacityPreemptionPolicy) smon.getSchedulingEditPolicy(); - editPolicy.editSchedule(); - Assert.assertEquals(4, editPolicy.getToPreemptContainers().size()); - - // check live containers immediately, nothing happen - Assert.assertEquals(39, schedulerApp1.getLiveContainers().size()); - - Thread.sleep(20*1000); - // Call editSchedule again: selected containers are killed - editPolicy.editSchedule(); - waitNumberOfLiveContainersFromApp(schedulerApp1, 35); - - // Call allocation, containers are reserved - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - waitNumberOfReservedContainersFromApp(schedulerApp2, 1); - - // Call editSchedule twice and allocation once, container should get allocated - editPolicy.editSchedule(); - editPolicy.editSchedule(); - - int tick = 0; - while (schedulerApp2.getLiveContainers().size() != 1 && tick < 10) { - cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); - tick++; - Thread.sleep(100); - } - waitNumberOfReservedContainersFromApp(schedulerApp2, 0); - - rm1.close(); - - - } - - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index b9bfc2aab58..1f59ea7d217 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -775,10 +775,12 @@ public void testPendingResourcesConsideringUserLimit() throws Exception { // u2 = 8GB - 4GB = 4GB // When not deduct reserved, total-pending = 3G (u1) + 4G (u2) = 7G // deduct reserved, total-pending = 0G (u1) + 4G = 4G - Assert.assertEquals(7 * GB, lq.getTotalPendingResourcesConsideringUserLimit( - Resources.createResource(20 * GB), "", false).getMemorySize()); - Assert.assertEquals(4 * GB, lq.getTotalPendingResourcesConsideringUserLimit( - Resources.createResource(20 * GB), "", true).getMemorySize()); + Assert.assertEquals(7 * GB, + lq.getTotalPendingResourcesConsideringUserLimit(lq.getUsedResources(), + Resources.createResource(20 * GB), "", false).getMemorySize()); + Assert.assertEquals(4 * GB, + lq.getTotalPendingResourcesConsideringUserLimit(lq.getUsedResources(), + Resources.createResource(20 * GB), "", true).getMemorySize()); rm1.close(); } @@ -936,8 +938,8 @@ public void testUserLimitAllocationMultipleContainers() throws Exception { // container will be allocated to am1 // App1 will get 2 container allocated (plus AM container) cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); - Assert.assertEquals(101, schedulerApp1.getLiveContainers().size()); - Assert.assertEquals(100, schedulerApp2.getLiveContainers().size()); + Assert.assertEquals(200, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(1, schedulerApp2.getLiveContainers().size()); rm1.close(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 04bb7912d1c..1bce55d9bbd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -844,7 +844,7 @@ public void testDRFUserLimits() throws Exception { assertTrue("Verify user_0 got resources ", queueUser0.getUsed() .getMemorySize() > 0); assertTrue("Verify user_1 got resources ", queueUser1.getUsed() - .getMemorySize() > 0); + .getMemorySize() == 0); assertTrue( "Expected AbsoluteUsedCapacity > 0.95, got: " + b.getAbsoluteUsedCapacity(), b.getAbsoluteUsedCapacity() > 0.95); @@ -952,16 +952,15 @@ public void testUserLimits() throws Exception { assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - // Allocate one container to app_1. Even if app_0 - // submit earlier, it cannot get this container assigned since user_0 - // exceeded user-limit already. + // app_0 can get this container assigned since user_0 + // is under user-limit calculated by user-limit-factor applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); - assertEquals(4*GB, a.getUsedResources().getMemorySize()); - assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); - assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); + assertEquals(6*GB, a.getUsedResources().getMemorySize()); + assertEquals(6*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); // Allocate one container to app_0, before allocating this container, // user-limit = floor((5 + 1) / 2) = 3G. app_0's used resource (3G) <= @@ -1249,9 +1248,9 @@ public void testComputeUserLimitAndSetHeadroom() throws IOException { qb.computeUserLimitAndSetHeadroom(app_3, clusterResource, "", SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY, null); assertEquals(4*GB, qb.getUsedResources().getMemorySize()); - //maxqueue 16G, userlimit 7G, used (by each user) 2G, headroom 5G (both) - assertEquals(5*GB, app_3.getHeadroom().getMemorySize()); - assertEquals(5*GB, app_1.getHeadroom().getMemorySize()); + //maxqueue 16G, userlimit 13G, used (by each user) 2G, headroom 11G (both) + assertEquals(11*GB, app_3.getHeadroom().getMemorySize()); + assertEquals(11*GB, app_1.getHeadroom().getMemorySize()); //test case 4 final ApplicationAttemptId appAttemptId_4 = TestUtils.getMockApplicationAttemptId(4, 0); @@ -1280,9 +1279,9 @@ public void testComputeUserLimitAndSetHeadroom() throws IOException { //testcase3 still active - 2+2+6=10 assertEquals(10*GB, qb.getUsedResources().getMemorySize()); //app4 is user 0 - //maxqueue 16G, userlimit 7G, used 8G, headroom 5G + //maxqueue 16G, userlimit 13G, used 8G, headroom 5G //(8G used is 6G from this test case - app4, 2 from last test case, app_1) - assertEquals(1*GB, app_4.getHeadroom().getMemorySize()); + assertEquals(5*GB, app_4.getHeadroom().getMemorySize()); } @Test @@ -1637,17 +1636,17 @@ public void testSingleQueueWithMultipleUsers() throws Exception { TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority, recordFactory))); - // Now allocations should goto app_2 since - // user_0 is at limit inspite of high user-limit-factor + // Now allocations should still goto app_0 since + // user_0 is higher user-limit-factor a.setUserLimitFactor(10); applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); - assertEquals(5*GB, a.getUsedResources().getMemorySize()); - assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(3*GB, a.getUsedResources().getMemorySize()); + assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); + assertEquals(0*GB, app_2.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_3.getCurrentConsumption().getMemorySize()); // Now allocations should goto app_0 since @@ -1656,10 +1655,10 @@ public void testSingleQueueWithMultipleUsers() throws Exception { a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); - assertEquals(6*GB, a.getUsedResources().getMemorySize()); - assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(4*GB, a.getUsedResources().getMemorySize()); + assertEquals(4*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); + assertEquals(0*GB, app_2.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_3.getCurrentConsumption().getMemorySize()); // Test max-capacity @@ -1671,14 +1670,14 @@ public void testSingleQueueWithMultipleUsers() throws Exception { a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); - assertEquals(6*GB, a.getUsedResources().getMemorySize()); - assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(4*GB, a.getUsedResources().getMemorySize()); + assertEquals(4*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); + assertEquals(0*GB, app_2.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_3.getCurrentConsumption().getMemorySize()); // Revert max-capacity and user-limit-factor - // Now, allocations should goto app_3 since it's under user-limit + // Now, allocations should goto app_2 since it's under user-limit a.setMaxCapacity(1.0f); a.setUserLimitFactor(1); root.updateClusterResource(clusterResource, @@ -1688,21 +1687,21 @@ public void testSingleQueueWithMultipleUsers() throws Exception { new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); assertEquals(7*GB, a.getUsedResources().getMemorySize()); - assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(4*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); - assertEquals(1*GB, app_3.getCurrentConsumption().getMemorySize()); + assertEquals(0*GB, app_3.getCurrentConsumption().getMemorySize()); - // Now we should assign to app_3 again since user_2 is under user-limit + // Now we should assign to app_3 since user_2 is under user-limit applyCSAssignment(clusterResource, a.assignContainers(clusterResource, node_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps); assertEquals(8*GB, a.getUsedResources().getMemorySize()); - assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); + assertEquals(4*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); - assertEquals(2*GB, app_3.getCurrentConsumption().getMemorySize()); + assertEquals(1*GB, app_3.getCurrentConsumption().getMemorySize()); // 8. Release each container from app_0 for (RMContainer rmContainer : app_0.getLiveContainers()) { @@ -1712,11 +1711,11 @@ public void testSingleQueueWithMultipleUsers() throws Exception { ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true); } - assertEquals(5*GB, a.getUsedResources().getMemorySize()); + assertEquals(4*GB, a.getUsedResources().getMemorySize()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(3*GB, app_2.getCurrentConsumption().getMemorySize()); - assertEquals(2*GB, app_3.getCurrentConsumption().getMemorySize()); + assertEquals(1*GB, app_3.getCurrentConsumption().getMemorySize()); // 9. Release each container from app_2 for (RMContainer rmContainer : app_2.getLiveContainers()) { @@ -1726,11 +1725,11 @@ public void testSingleQueueWithMultipleUsers() throws Exception { ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL, null, true); } - assertEquals(2*GB, a.getUsedResources().getMemorySize()); + assertEquals(1*GB, a.getUsedResources().getMemorySize()); assertEquals(0*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_2.getCurrentConsumption().getMemorySize()); - assertEquals(2*GB, app_3.getCurrentConsumption().getMemorySize()); + assertEquals(1*GB, app_3.getCurrentConsumption().getMemorySize()); // 10. Release each container from app_3 for (RMContainer rmContainer : app_3.getLiveContainers()) { @@ -3632,8 +3631,8 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0 * GB, - e.getTotalPendingResourcesConsideringUserLimit(clusterResource, - NO_LABEL, false).getMemorySize()); + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // Assign 2nd container of 1GB applyCSAssignment(clusterResource, @@ -3646,8 +3645,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() // app_0 and app_1 are asking for a cumulative 3GB. assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // Can't allocate 3rd container due to user-limit. Headroom still 0. applyCSAssignment(clusterResource, @@ -3656,16 +3656,18 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // Increase user-limit-factor from 1GB to 10GB (1% * 10 * 100GB = 10GB). // Pending for both app_0 and app_1 are still 3GB, so user-limit-factor // is no longer limiting the return value of // getTotalPendingResourcesConsideringUserLimit() e.setUserLimitFactor(10.0f); - assertEquals(3*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(3 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, @@ -3674,8 +3676,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() // app_0 is now satisified, app_1 is still asking for 2GB. assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(2 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // Get the last 2 containers for app_1, no more pending requests. applyCSAssignment(clusterResource, @@ -3688,8 +3691,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // Release each container from app_0 for (RMContainer rmContainer : app_0.getLiveContainers()) { @@ -3786,7 +3790,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, priority, recordFactory))); - // Pending resource requests for user_1: app_2 and app_3 total 1GB. + // Pending resource requests for user_1: app_2 and app_3 total 3GB. priority = TestUtils.createMockPriority(1); app_2.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true, @@ -3799,8 +3803,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // Start testing... // With queue capacity set at 1% of 100GB and user-limit-factor set to 1.0, // queue 'e' should be able to consume 1GB per user. - assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(2 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // None of the apps have assigned resources // user_0's apps: assertEquals(0*GB, app_0.getCurrentConsumption().getMemorySize()); @@ -3816,8 +3821,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // The first container was assigned to user_0's app_0. Queues total headroom // has 1GB left for user_1. - assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(1 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3834,8 +3840,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // scheduler will assign one container more than user-limit-factor. So, // this container went to user_0's app_1. so, headroom for queue 'e'e is // still 1GB for user_1 - assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(1 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3850,8 +3857,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Container was allocated to user_1's app_2 since user_1, Now, no headroom // is left. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3866,8 +3874,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Allocated to user_1's app_2 since scheduler allocates 1 container // above user resource limit. Available headroom still 0. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: long app_0_consumption = app_0.getCurrentConsumption().getMemorySize(); assertEquals(1*GB, app_0_consumption); @@ -3886,8 +3895,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Cannot allocate 5th container because both users are above their allowed // user resource limit. Values should be the same as previously. - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(app_0_consumption, app_0.getCurrentConsumption().getMemorySize()); assertEquals(app_1_consumption, app_1.getCurrentConsumption().getMemorySize()); @@ -3905,8 +3915,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Next container goes to user_0's app_1, since it still wanted 1GB. - assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(1 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3920,8 +3931,9 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Last container goes to user_1's app_3, since it still wanted 1GB. // user_0's apps: - assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, NO_LABEL, false).getMemorySize()); + assertEquals(0 * GB, + e.getTotalPendingResourcesConsideringUserLimit(e.getUsedResources(), + clusterResource, NO_LABEL, false).getMemorySize()); assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); // user_1's apps: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java index 1836919d404..9acbd5ec51d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java @@ -1075,8 +1075,8 @@ public RMNodeLabelsManager createNodeLabelManager() { // one more cycle before allocating to non-partitioned node Assert.assertEquals(10, cycleWaited); - // Both NM1/NM2 launched 10 containers, cluster resource is exhausted - checkLaunchedContainerNumOnNode(rm1, nm1.getNodeId(), 10); + // NM2 have 10 containers because of ULF=5, NM1 only have half since ULF=1 + checkLaunchedContainerNumOnNode(rm1, nm1.getNodeId(), 6); checkLaunchedContainerNumOnNode(rm1, nm2.getNodeId(), 10); rm1.close(); @@ -1214,14 +1214,15 @@ public RMNodeLabelsManager createNodeLabelManager() { cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); } - // app1 gets all resource in partition=x - Assert.assertEquals(10, schedulerNode1.getNumContainers()); + // queue a has user-limit-factor of 1.0f, so app1 only gets half of + // resource in partition=x regardless of user-limit-percent + Assert.assertEquals(6, schedulerNode1.getNumContainers()); // check non-exclusive containers of LeafQueue is correctly updated LeafQueue leafQueue = (LeafQueue) cs.getQueue("a"); Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey( "y")); - Assert.assertEquals(10, + Assert.assertEquals(6, leafQueue.getIgnoreExclusivityRMContainers().get("x").size()); // completes all containers of app1, ignoreExclusivityRMContainers should be