From 216080dc62f1b99012c26f118a6dd678f482fa12 Mon Sep 17 00:00:00 2001 From: Sunil G Date: Sun, 7 May 2017 23:16:14 +0530 Subject: [PATCH] YARN-2113.0014 --- .../CapacitySchedulerPreemptionContext.java | 5 + .../capacity/CapacitySchedulerPreemptionUtils.java | 6 +- .../capacity/FifoIntraQueuePreemptionPlugin.java | 314 ++++--- .../capacity/IntraQueueCandidatesSelector.java | 110 ++- .../IntraQueuePreemptionComputePlugin.java | 10 +- .../ProportionalCapacityPreemptionPolicy.java | 23 +- .../monitor/capacity/TempAppPerPartition.java | 6 +- .../monitor/capacity/TempQueuePerPartition.java | 14 + .../monitor/capacity/TempUserPerPartition.java | 88 ++ .../capacity/CapacitySchedulerConfiguration.java | 8 + .../scheduler/capacity/LeafQueue.java | 11 +- .../scheduler/capacity/UsersManager.java | 11 +- ...ionalCapacityPreemptionPolicyMockFramework.java | 89 +- ...ortionalCapacityPreemptionPolicyIntraQueue.java | 30 +- ...apacityPreemptionPolicyIntraQueueUserLimit.java | 899 +++++++++++++++++++++ 15 files changed, 1474 insertions(+), 150 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java index 982b1f1..bc73a96 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java @@ -18,9 +18,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; @@ -63,4 +65,7 @@ TempQueuePerPartition getQueueByPartition(String queueName, float getMinimumThresholdForIntraQueuePreemption(); float getMaxAllowableLimitForIntraQueuePreemption(); + + @Unstable + IntraQueuePreemptionOrder getIntraQueuePreemptionOrder(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java index abad2a1..3e03d82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java @@ -99,7 +99,7 @@ public static void deductPreemptableResourcesBasedSelectedCandidates( } deductPreemptableResourcePerApp(context, tq.totalPartitionResource, - tas, res, partition); + tas, res); } } } @@ -108,10 +108,10 @@ public static void deductPreemptableResourcesBasedSelectedCandidates( private static void deductPreemptableResourcePerApp( CapacitySchedulerPreemptionContext context, Resource totalPartitionResource, Collection tas, - Resource res, String partition) { + Resource res) { for (TempAppPerPartition ta : tas) { ta.deductActuallyToBePreempted(context.getResourceCalculator(), - totalPartitionResource, res, partition); + totalPartitionResource, res); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 5f1af1e..a280114 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -18,11 +18,13 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.Set; @@ -33,7 +35,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.IntraQueueCandidatesSelector.TAPriorityComparator; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; @@ -61,6 +65,26 @@ public FifoIntraQueuePreemptionPlugin(ResourceCalculator rc, } @Override + public Collection getPreemptableApps(String queueName, + String partition) { + TempQueuePerPartition tq = context.getQueueByPartition(queueName, + partition); + + List apps = new ArrayList(); + for (TempAppPerPartition tmpApp : tq.getApps()) { + // If a lower priority app was not selected to get preempted, mark such + // apps out from preemption candidate selection. + if (Resources.equals(tmpApp.getActuallyToBePreempted(), + Resources.none())) { + continue; + } + + apps.add(tmpApp.app); + } + return apps; + } + + @Override public Map getResourceDemandFromAppsPerQueue( String queueName, String partition) { @@ -90,7 +114,7 @@ public FifoIntraQueuePreemptionPlugin(ResourceCalculator rc, @Override public void computeAppsIdealAllocation(Resource clusterResource, - Resource partitionBasedResource, TempQueuePerPartition tq, + TempQueuePerPartition tq, Map> selectedCandidates, Resource totalPreemptedResourceAllowed, Resource queueReassignableResource, float maxAllowablePreemptLimit) { @@ -113,17 +137,15 @@ public void computeAppsIdealAllocation(Resource clusterResource, // 3. Create all tempApps for internal calculation and return a list from // high priority to low priority order. - TAPriorityComparator taComparator = new TAPriorityComparator(); - PriorityQueue orderedByPriority = - createTempAppForResCalculation(tq.partition, apps, taComparator); + PriorityQueue orderedByPriority = createTempAppForResCalculation( + tq, apps, clusterResource, perUserAMUsed); // 4. Calculate idealAssigned per app by checking based on queue's // unallocated resource.Also return apps arranged from lower priority to // higher priority. - TreeSet orderedApps = - calculateIdealAssignedResourcePerApp(clusterResource, - partitionBasedResource, tq, selectedCandidates, - queueReassignableResource, orderedByPriority, perUserAMUsed); + TreeSet orderedApps = calculateIdealAssignedResourcePerApp( + clusterResource, tq, selectedCandidates, queueReassignableResource, + orderedByPriority); // 5. A configurable limit that could define an ideal allowable preemption // limit. Based on current queue's capacity,defined how much % could become @@ -154,7 +176,8 @@ public void computeAppsIdealAllocation(Resource clusterResource, // 8. There are chances that we may preempt for the demand from same // priority level, such cases are to be validated out. validateOutSameAppPriorityFromDemand(clusterResource, - (TreeSet) tq.getApps()); + (TreeSet) orderedApps, tq.getUsersPerPartition(), + context.getIntraQueuePreemptionOrder()); if (LOG.isDebugEnabled()) { LOG.debug("Queue Name:" + tq.queueName + ", partition:" + tq.partition); @@ -222,31 +245,25 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, * } * * @param clusterResource Cluster Resource - * @param partitionBasedResource resource per partition * @param tq TempQueue * @param selectedCandidates Already Selected preemption candidates * @param queueReassignableResource Resource used in a queue * @param orderedByPriority List of running apps - * @param perUserAMUsed AM used resource + * @param usersPerPartition AM used resource * @return List of temp apps ordered from low to high priority */ private TreeSet calculateIdealAssignedResourcePerApp( - Resource clusterResource, Resource partitionBasedResource, - TempQueuePerPartition tq, + Resource clusterResource, TempQueuePerPartition tq, Map> selectedCandidates, Resource queueReassignableResource, - PriorityQueue orderedByPriority, - Map perUserAMUsed) { + PriorityQueue orderedByPriority) { Comparator reverseComp = Collections .reverseOrder(new TAPriorityComparator()); TreeSet orderedApps = new TreeSet<>(reverseComp); - Map userIdealAssignedMapping = new HashMap<>(); String partition = tq.partition; - - Map preCalculatedUserLimit = - new HashMap(); + Map usersPerPartition = tq.getUsersPerPartition(); while (!orderedByPriority.isEmpty()) { // Remove app from the next highest remaining priority and process it to @@ -261,39 +278,13 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, } String userName = tmpApp.app.getUser(); - Resource userLimitResource = preCalculatedUserLimit.get(userName); - - // Verify whether we already calculated headroom for this user. - if (userLimitResource == null) { - userLimitResource = Resources.clone( - tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource, - partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY)); - - Resource amUsed = perUserAMUsed.get(userName); - if (null == amUsed) { - amUsed = Resources.createResource(0, 0); - } - - // Real AM used need not have to be considered for user-limit as well. - userLimitResource = Resources.subtract(userLimitResource, amUsed); - if (LOG.isDebugEnabled()) { - LOG.debug("Userlimit for user '" + userName + "' is :" - + userLimitResource + ", and amUsed is:" + amUsed); - } - - preCalculatedUserLimit.put(userName, userLimitResource); - } - - Resource idealAssignedForUser = userIdealAssignedMapping.get(userName); - - if (idealAssignedForUser == null) { - idealAssignedForUser = Resources.createResource(0, 0); - userIdealAssignedMapping.put(userName, idealAssignedForUser); - } + TempUserPerPartition tmpUser = usersPerPartition.get(userName); + Resource userLimitResource = tmpUser.getUserLimit(); + Resource idealAssignedForUser = tmpUser.idealAssigned; // Calculate total selected container resources from current app. - getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, - tmpApp, partition); + getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, tmpApp, + tmpUser, partition); // For any app, used+pending will give its idealAssigned. However it will // be tightly linked to queue's unallocated quota. So lower priority apps @@ -304,10 +295,11 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, if (Resources.lessThan(rc, clusterResource, idealAssignedForUser, userLimitResource)) { - appIdealAssigned = Resources.min(rc, clusterResource, appIdealAssigned, + Resource idealAssigned = Resources.min(rc, clusterResource, + appIdealAssigned, Resources.subtract(userLimitResource, idealAssignedForUser)); tmpApp.idealAssigned = Resources.clone(Resources.min(rc, - clusterResource, queueReassignableResource, appIdealAssigned)); + clusterResource, queueReassignableResource, idealAssigned)); Resources.addTo(idealAssignedForUser, tmpApp.idealAssigned); } else { continue; @@ -334,7 +326,8 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, */ private void getAlreadySelectedPreemptionCandidatesResource( Map> selectedCandidates, - TempAppPerPartition tmpApp, String partition) { + TempAppPerPartition tmpApp, TempUserPerPartition tmpUser, + String partition) { tmpApp.selected = Resources.createResource(0, 0); Set containers = selectedCandidates .get(tmpApp.app.getApplicationAttemptId()); @@ -346,16 +339,23 @@ private void getAlreadySelectedPreemptionCandidatesResource( for (RMContainer cont : containers) { if (partition.equals(cont.getNodeLabelExpression())) { Resources.addTo(tmpApp.selected, cont.getAllocatedResource()); + Resources.addTo(tmpUser.selected, cont.getAllocatedResource()); } } } private PriorityQueue createTempAppForResCalculation( - String partition, Collection apps, - TAPriorityComparator taComparator) { + TempQueuePerPartition tq, Collection apps, + Resource clusterResource, + Map perUserAMUsed) { + TAPriorityComparator taComparator = new TAPriorityComparator(); PriorityQueue orderedByPriority = new PriorityQueue<>( 100, taComparator); + String partition = tq.partition; + Map usersPerPartition = tq + .getUsersPerPartition(); + // have an internal temp app structure to store intermediate data(priority) for (FiCaSchedulerApp app : apps) { @@ -387,56 +387,156 @@ private void getAlreadySelectedPreemptionCandidatesResource( tmpApp.idealAssigned = Resources.createResource(0, 0); orderedByPriority.add(tmpApp); + + // Create a TempUserPerPartition structure to hold more information + // regarding each user's entities such as UserLimit etc. This could + // be kept in a user to TempUserPerPartition map for further reference. + String userName = app.getUser(); + if (!usersPerPartition.containsKey(userName)) { + ResourceUsage userResourceUsage = tq.leafQueue.getUser(userName) + .getResourceUsage(); + + TempUserPerPartition tmpUser = new TempUserPerPartition( + tq.leafQueue.getUser(userName), tq.queueName, + Resources.clone(userResourceUsage.getUsed(partition)), + Resources.clone(perUserAMUsed.get(userName)), + Resources.clone(userResourceUsage.getReserved(partition)), + Resources.none()); + + Resource userLimitResource = Resources.clone( + tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource, + partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY)); + + // Real AM used need not have to be considered for user-limit as well. + userLimitResource = Resources.subtract(userLimitResource, + tmpUser.amUsed); + tmpUser.setUserLimit(userLimitResource); + + if (LOG.isDebugEnabled()) { + LOG.debug("TempUser:" + tmpUser); + } + + tmpUser.idealAssigned = Resources.createResource(0, 0); + tq.addUserPerPartition(userName, tmpUser); + } } return orderedByPriority; } /* * Fifo+Priority based preemption policy need not have to preempt resources at - * same priority level. Such cases will be validated out. + * same priority level. Such cases will be validated out. But if the demand is + * from an app of different user, force to preempt resources even if apps are + * at same priority. */ public void validateOutSameAppPriorityFromDemand(Resource cluster, - TreeSet appsOrderedfromLowerPriority) { + TreeSet orderedApps, + Map usersPerPartition, + IntraQueuePreemptionOrder intraQueuePreemptionOrder) { - TempAppPerPartition[] apps = appsOrderedfromLowerPriority - .toArray(new TempAppPerPartition[appsOrderedfromLowerPriority.size()]); + TempAppPerPartition[] apps = orderedApps + .toArray(new TempAppPerPartition[orderedApps.size()]); if (apps.length <= 0) { return; } - int lPriority = 0; - int hPriority = apps.length - 1; - - while (lPriority < hPriority - && !apps[lPriority].equals(apps[hPriority]) - && apps[lPriority].getPriority() < apps[hPriority].getPriority()) { - Resource toPreemptFromOther = apps[hPriority] - .getToBePreemptFromOther(); - Resource actuallyToPreempt = apps[lPriority].getActuallyToBePreempted(); - Resource delta = Resources.subtract(apps[lPriority].toBePreempted, - actuallyToPreempt); - - if (Resources.greaterThan(rc, cluster, delta, Resources.none())) { - Resource toPreempt = Resources.min(rc, cluster, - toPreemptFromOther, delta); - - apps[hPriority].setToBePreemptFromOther( - Resources.subtract(toPreemptFromOther, toPreempt)); - apps[lPriority].setActuallyToBePreempted( - Resources.add(actuallyToPreempt, toPreempt)); - } + for (int hPriority = apps.length - 1; hPriority >= 0; hPriority--) { - if (Resources.lessThanOrEqual(rc, cluster, - apps[lPriority].toBePreempted, - apps[lPriority].getActuallyToBePreempted())) { - lPriority++; - continue; - } + // Check whether high priority app with demand needs resource from other + // user. + if (Resources.greaterThan(rc, cluster, + apps[hPriority].getToBePreemptFromOther(), Resources.none())) { - if (Resources.equals(apps[hPriority].getToBePreemptFromOther(), - Resources.none())) { - hPriority--; - continue; + // Given we have a demand from a high priority app, we can do a reverse + // scan from lower priority apps to select resources. + // Since idealAssigned of each app has considered user-limit, this logic + // will provide eventual consistency w.r.t user-limit as well. + for (int lPriority = 0; lPriority < apps.length; lPriority++) { + + // Check whether app with demand needs resource from other user. + if (Resources.greaterThan(rc, cluster, apps[lPriority].toBePreempted, + Resources.none())) { + + // If apps are of same user, and priority is same, then skip. + if ((apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (apps[lPriority].getPriority() >= apps[hPriority] + .getPriority())) { + continue; + } + + if (Resources.lessThanOrEqual(rc, cluster, + apps[lPriority].toBePreempted, + apps[lPriority].getActuallyToBePreempted()) + || Resources.equals(apps[hPriority].getToBePreemptFromOther(), + Resources.none())) { + continue; + } + + // Ideally if any application has a higher priority, then it can + // force to preempt any lower priority app from any user. However + // if admin enforces user-limit over priority, preemption module + // will not choose lower priority apps from usre's who are not yet + // met its user-limit. + TempUserPerPartition tmpUser = usersPerPartition + .get(apps[lPriority].getUser()); + if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (!tmpUser.isUserLimitReached(rc, cluster)) + && (intraQueuePreemptionOrder + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST))) { + continue; + } + + Resource toPreemptFromOther = apps[hPriority] + .getToBePreemptFromOther(); + Resource actuallyToPreempt = apps[lPriority] + .getActuallyToBePreempted(); + + // A lower priority app could offer more resource to preempt, if + // multiple higher priority/under served users needs resources. + // After one iteration, we need to ensure that actuallyToPreempt is + // subtracted from the resource to preempt. + Resource preemptableFromLowerPriorityApp = Resources + .subtract(apps[lPriority].toBePreempted, actuallyToPreempt); + + // In case of user-limit preemption, when app's are from different + // user and of same priority, we will do user-limit preemption if + // there is a demand from under UL quota app. + // However this under UL quota app's demand may be more. + // Still we should ensure that we are not doing over preemption such + // that only a maximum of (user's used - UL quota) could be + // preempted. + if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (apps[lPriority].getPriority() == apps[hPriority] + .getPriority()) + && tmpUser.isUserLimitReached(rc, cluster)) { + + Resource deltaULQuota = Resources + .subtract(tmpUser.getUsedDeductAM(), tmpUser.selected); + Resources.subtractFrom(deltaULQuota, tmpUser.getUserLimit()); + + if (tmpUser.isPreemptionQuotaForULDeltaDone()) { + deltaULQuota = Resources.createResource(0, 0); + } + + if (Resources.lessThan(rc, cluster, deltaULQuota, + preemptableFromLowerPriorityApp)) { + tmpUser.updatePreemptionQuotaForULDeltaAsDone(true); + preemptableFromLowerPriorityApp = deltaULQuota; + } + } + + if (Resources.greaterThan(rc, cluster, + preemptableFromLowerPriorityApp, Resources.none())) { + Resource toPreempt = Resources.min(rc, cluster, + toPreemptFromOther, preemptableFromLowerPriorityApp); + + apps[hPriority].setToBePreemptFromOther( + Resources.subtract(toPreemptFromOther, toPreempt)); + apps[lPriority].setActuallyToBePreempted( + Resources.add(actuallyToPreempt, toPreempt)); + } + } + } } } } @@ -456,6 +556,40 @@ private Resource calculateUsedAMResourcesPerQueue(String partition, Resources.addTo(userAMResource, app.getAMResource(partition)); Resources.addTo(amUsed, app.getAMResource(partition)); } + return amUsed; } + + @Override + public boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app, + Resource clusterResource, Resource usedResource, RMContainer c) { + // Ensure below checks + // 1. This check must be done only when preemption order is USERLIMIT_FIRST + // 2. By selecting container "c", check whether this user's resource usage + // is going below its user-limit. + // 3. Used resource of user must be always greater than user-limit to + // skip some containers as per this check. If used resource is under user + // limit, then these containers of this user has to be preempted as demand + // might be due to high priority apps running in same user. + String partition = context.getScheduler() + .getSchedulerNode(c.getAllocatedNode()).getPartition(); + TempQueuePerPartition tq = context.getQueueByPartition(app.getQueueName(), + partition); + TempUserPerPartition tmpUser = tq.getUsersPerPartition().get(app.getUser()); + + // Given user is not present, skip the check. + if (tmpUser == null) { + return false; + } + + // For ideal resource computations, user-limit got saved by subtracting am + // used resource in TempUser. Hence it has to be added back here for + // complete check. + Resource userLimit = Resources.add(tmpUser.getUserLimit(), tmpUser.amUsed); + + return Resources.lessThanOrEqual(rc, clusterResource, + Resources.subtract(usedResource, c.getAllocatedResource()), userLimit) + && context.getIntraQueuePreemptionOrder() + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java index 2890414..fe26434 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java @@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; @@ -31,8 +32,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; -import java.util.Iterator; +import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -51,14 +53,14 @@ Comparator { @Override - public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { - Priority p1 = Priority.newInstance(tq1.getPriority()); - Priority p2 = Priority.newInstance(tq2.getPriority()); + public int compare(TempAppPerPartition ta1, TempAppPerPartition ta2) { + Priority p1 = Priority.newInstance(ta1.getPriority()); + Priority p2 = Priority.newInstance(ta2.getPriority()); if (!p1.equals(p2)) { return p1.compareTo(p2); } - return tq1.getApplicationId().compareTo(tq2.getApplicationId()); + return ta1.getApplicationId().compareTo(ta2.getApplicationId()); } } @@ -121,17 +123,27 @@ public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { Map resToObtainByPartition = fifoPreemptionComputePlugin .getResourceDemandFromAppsPerQueue(queueName, partition); - // 6. Based on the selected resource demand per partition, select + // Default preemption iterator considers only FIFO+priority. For + // userlimit preemption, its possible that some lower priority apps + // needs from high priority app of another user. Hence use apps + // ordered by userlimit starvation as well. + Collection apps = fifoPreemptionComputePlugin + .getPreemptableApps(queueName, partition); + + // 6. Get user-limit to ensure that we do not preempt resources which + // will force user's resource to come under its UL. + Map rollingResourceUsagePerUser = new HashMap<>(); + initializeUsageAndUserLimitForCompute(clusterResource, partition, + leafQueue, rollingResourceUsagePerUser); + + // 7. Based on the selected resource demand per partition, select // containers with known policy from inter-queue preemption. try { leafQueue.getReadLock().lock(); - Iterator desc = leafQueue.getOrderingPolicy() - .getPreemptionIterator(); - while (desc.hasNext()) { - FiCaSchedulerApp app = desc.next(); - preemptFromLeastStarvedApp(selectedCandidates, clusterResource, - totalPreemptedResourceAllowed, resToObtainByPartition, - leafQueue, app); + for (FiCaSchedulerApp app : apps) { + preemptFromLeastStarvedApp(leafQueue, app, selectedCandidates, + clusterResource, totalPreemptedResourceAllowed, + resToObtainByPartition, rollingResourceUsagePerUser); } } finally { leafQueue.getReadLock().unlock(); @@ -142,16 +154,30 @@ public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { return selectedCandidates; } - private void preemptFromLeastStarvedApp( + private void initializeUsageAndUserLimitForCompute(Resource clusterResource, + String partition, LeafQueue leafQueue, + Map rollingResourceUsagePerUser) { + for (String user : leafQueue.getAllUsers()) { + // Initialize used resource of a given user for rolling computation. + rollingResourceUsagePerUser.put(user, Resources.clone( + leafQueue.getUser(user).getResourceUsage().getUsed(partition))); + if (LOG.isDebugEnabled()) { + LOG.debug("Rolling resource usage for user:" + user + " is : " + + rollingResourceUsagePerUser.get(user)); + } + } + } + + private void preemptFromLeastStarvedApp(LeafQueue leafQueue, + FiCaSchedulerApp app, Map> selectedCandidates, Resource clusterResource, Resource totalPreemptedResourceAllowed, - Map resToObtainByPartition, LeafQueue leafQueue, - FiCaSchedulerApp app) { + Map resToObtainByPartition, + Map rollingResourceUsagePerUser) { // ToDo: Reuse reservation selector here. - List liveContainers = new ArrayList<>( - app.getLiveContainers()); + List liveContainers = new ArrayList<>(app.getLiveContainers()); sortContainers(liveContainers); if (LOG.isDebugEnabled()) { @@ -160,6 +186,8 @@ private void preemptFromLeastStarvedApp( + totalPreemptedResourceAllowed); } + Resource rollingUsedResourcePerUser = rollingResourceUsagePerUser + .get(app.getUser()); for (RMContainer c : liveContainers) { // if there are no demand, return. @@ -184,12 +212,34 @@ private void preemptFromLeastStarvedApp( continue; } + // If selected container brings down resource usage under its user's + // UserLimit (or equals to), we must skip such containers. + if (fifoPreemptionComputePlugin.skipContainerBasedOnIntraQueuePolicy(app, + clusterResource, rollingUsedResourcePerUser, c)) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Skipping container: " + c.getContainerId() + " with resource:" + + c.getAllocatedResource() + " as UserLimit for user:" + + app.getUser() + " with resource usage: " + + rollingUsedResourcePerUser + " is going under UL"); + } + break; + } + // Try to preempt this container - CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain( - rc, preemptionContext, resToObtainByPartition, c, clusterResource, - selectedCandidates, totalPreemptedResourceAllowed); + boolean ret = CapacitySchedulerPreemptionUtils + .tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, + resToObtainByPartition, c, clusterResource, selectedCandidates, + totalPreemptedResourceAllowed); + + // Subtract from respective user's resource usage once a container is + // selected for preemption. + if (ret && preemptionContext.getIntraQueuePreemptionOrder() + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST)) { + Resources.subtractFrom(rollingUsedResourcePerUser, + c.getAllocatedResource()); + } } - } private void computeIntraQueuePreemptionDemand(Resource clusterResource, @@ -205,12 +255,7 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource, continue; } - // 2. Its better to get partition based resource limit earlier before - // starting calculation - Resource partitionBasedResource = - context.getPartitionResource(partition); - - // 3. loop through all queues corresponding to a partition. + // 2. loop through all queues corresponding to a partition. for (String queueName : queueNames) { TempQueuePerPartition tq = context.getQueueByPartition(queueName, partition); @@ -221,23 +266,22 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource, continue; } - // 4. Consider reassignableResource as (used - actuallyToBePreempted). + // 3. Consider reassignableResource as (used - actuallyToBePreempted). // This provides as upper limit to split apps quota in a queue. Resource queueReassignableResource = Resources.subtract(tq.getUsed(), tq.getActuallyToBePreempted()); - // 5. Check queue's used capacity. Make sure that the used capacity is + // 4. Check queue's used capacity. Make sure that the used capacity is // above certain limit to consider for intra queue preemption. if (leafQueue.getQueueCapacities().getUsedCapacity(partition) < context .getMinimumThresholdForIntraQueuePreemption()) { continue; } - // 6. compute the allocation of all apps based on queue's unallocated + // 5. compute the allocation of all apps based on queue's unallocated // capacity fifoPreemptionComputePlugin.computeAppsIdealAllocation(clusterResource, - partitionBasedResource, tq, selectedCandidates, - totalPreemptedResourceAllowed, + tq, selectedCandidates, totalPreemptedResourceAllowed, queueReassignableResource, context.getMaxAllowableLimitForIntraQueuePreemption()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java index 93ebe65..56fd007 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java @@ -18,12 +18,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import java.util.Collection; import java.util.Map; import java.util.Set; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; interface IntraQueuePreemptionComputePlugin { @@ -32,8 +34,14 @@ String partition); void computeAppsIdealAllocation(Resource clusterResource, - Resource partitionBasedResource, TempQueuePerPartition tq, + TempQueuePerPartition tq, Map> selectedCandidates, Resource totalPreemptedResourceAllowed, Resource queueTotalUnassigned, float maxAllowablePreemptLimit); + + Collection getPreemptableApps(String queueName, + String partition); + + boolean skipContainerBasedOnIntraQueuePolicy(FiCaSchedulerApp app, + Resource clusterResource, Resource usedResource, RMContainer c); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java index dc6f1c2..7ff8945 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java @@ -22,6 +22,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -79,6 +80,16 @@ */ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolicy, CapacitySchedulerPreemptionContext { + + /** + * IntraQueuePreemptionOrder will be used to define various priority orders + * which could be configured by admin. + */ + @Unstable + public enum IntraQueuePreemptionOrder { + PRIORITY_FIRST, USERLIMIT_FIRST; + } + private static final Log LOG = LogFactory.getLog(ProportionalCapacityPreemptionPolicy.class); @@ -95,6 +106,7 @@ private float maxAllowableLimitForIntraQueuePreemption; private float minimumThresholdForIntraQueuePreemption; + private IntraQueuePreemptionOrder intraQueuePreemptionOrder; // Pointer to other RM components private RMContext rmContext; @@ -190,6 +202,11 @@ public void init(Configuration config, RMContext context, CapacitySchedulerConfiguration. DEFAULT_INTRAQUEUE_PREEMPTION_MINIMUM_THRESHOLD); + intraQueuePreemptionOrder = IntraQueuePreemptionOrder.valueOf(csConfig + .get(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + CapacitySchedulerConfiguration.DEFAULT_INTRAQUEUE_PREEMPTION_ORDER) + .toUpperCase()); + rc = scheduler.getResourceCalculator(); nlm = scheduler.getRMContext().getNodeLabelManager(); @@ -242,7 +259,6 @@ public synchronized void editSchedule() { } } - @SuppressWarnings("unchecked") private void preemptOrkillSelectedContainerAfterWait( Map> selectedCandidates, long currentTime) { @@ -652,4 +668,9 @@ public void addPartitionToUnderServedQueues(String queueName, } underServedQueues.add(queueName); } + + @Override + public IntraQueuePreemptionOrder getIntraQueuePreemptionOrder() { + return intraQueuePreemptionOrder; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java index fccd2a7..e9a934b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java @@ -91,8 +91,12 @@ public ApplicationId getApplicationId() { return applicationId; } + public String getUser() { + return this.app.getUser(); + } + public void deductActuallyToBePreempted(ResourceCalculator resourceCalculator, - Resource cluster, Resource toBeDeduct, String partition) { + Resource cluster, Resource toBeDeduct) { if (Resources.greaterThan(resourceCalculator, cluster, getActuallyToBePreempted(), toBeDeduct)) { Resources.subtractFrom(getActuallyToBePreempted(), toBeDeduct); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java index 7eab015..89452f9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java @@ -26,6 +26,8 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.Map; /** * Temporary data-structure tracking resource availability, pending resource @@ -59,6 +61,10 @@ int relativePriority = 0; TempQueuePerPartition parent = null; + // This will hold a temp user data structure and will hold userlimit, + // idealAssigned, used etc. + Map usersPerPartition = new LinkedHashMap<>(); + TempQueuePerPartition(String queueName, Resource current, boolean preemptionDisabled, String partition, Resource killable, float absCapacity, float absMaxCapacity, Resource totalPartitionResource, @@ -289,4 +295,12 @@ public void addAllApps(Collection orderedApps) { return apps; } + public void addUserPerPartition(String userName, + TempUserPerPartition tmpUser) { + this.usersPerPartition.put(userName, tmpUser); + } + + public Map getUsersPerPartition() { + return usersPerPartition; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java new file mode 100644 index 0000000..33ee18f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + + +/** + * Temporary data-structure tracking resource availability, pending resource + * need, current utilization for an application. + */ +public class TempUserPerPartition extends AbstractPreemptionEntity { + + private final User user; + private Resource userLimit; + private boolean donePreemptionQuotaForULDelta = false; + + TempUserPerPartition(User user, String queueName, Resource usedPerPartition, + Resource amUsedPerPartition, Resource reserved, + Resource pendingPerPartition) { + super(queueName, usedPerPartition, amUsedPerPartition, reserved, + pendingPerPartition); + this.user = user; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" NAME: " + getUserName()).append(" CUR: ").append(getUsed()) + .append(" PEN: ").append(pending).append(" RESERVED: ").append(reserved) + .append(" AM_USED: ").append(amUsed).append(" USER_LIMIT: ") + .append(getUserLimit()).append(" IDEAL_ASSIGNED: ") + .append(idealAssigned).append(" USED_WO_AMUSED: ") + .append(getUsedDeductAM()).append(" IDEAL_PREEMPT: ") + .append(toBePreempted).append(" ACTUAL_PREEMPT: ") + .append(getActuallyToBePreempted()).append("\n"); + + return sb.toString(); + } + + public String getUserName() { + return user.getUserName(); + } + + public Resource getUserLimit() { + return userLimit; + } + + public void setUserLimit(Resource userLimitResource) { + this.userLimit = userLimitResource; + } + + public boolean isUserLimitReached(ResourceCalculator rc, + Resource clusterResource) { + if (Resources.greaterThan(rc, clusterResource, getUsedDeductAM(), + userLimit)) { + return true; + } + return false; + } + + public boolean isPreemptionQuotaForULDeltaDone() { + return this.donePreemptionQuotaForULDelta; + } + + public void updatePreemptionQuotaForULDeltaAsDone(boolean done) { + this.donePreemptionQuotaForULDelta = done; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 9fb92ec..24efedb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -1234,6 +1234,14 @@ public boolean getLazyPreemptionEnabled() { 0.2f; /** + * For intra-queue preemption, enforce a preemption order such as + * "userlimit_first" or "priority_first". + */ + public static final String INTRAQUEUE_PREEMPTION_ORDER = PREEMPTION_CONFIG_PREFIX + + INTRA_QUEUE_PREEMPTION_CONFIG_PREFIX + "preemption-order"; + public static final String DEFAULT_INTRAQUEUE_PREEMPTION_ORDER = "userlimit_first"; + + /** * Maximum application for a queue to be used when application per queue is * not defined.To be consistent with previous version the default value is set * as UNDEFINED. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 9059ef0..104e95e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -43,12 +43,10 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.security.AccessType; -import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; @@ -56,7 +54,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.*; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; @@ -2022,4 +2019,12 @@ public void stopQueue() { writeLock.unlock(); } } + + /** + * Get all valid users in this queue. + * @return user list + */ + public Set getAllUsers() { + return this.getUsersManager().getUsers().keySet(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java index c2134eb..579c4c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java @@ -253,6 +253,15 @@ public Resource getUserResourceLimit() { public void setUserResourceLimit(Resource userResourceLimit) { this.userResourceLimit = userResourceLimit; } + + public String getUserName() { + return this.userName; + } + + @VisibleForTesting + public void setResourceUsage(ResourceUsage resourceUsage) { + this.userResourceUsage = resourceUsage; + } } /* End of User class */ /** @@ -344,7 +353,7 @@ public void userLimitNeedsRecompute() { /* * Get all users of queue. */ - private Map getUsers() { + public Map getUsers() { return users; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java index a9e97fd..4fc0ea4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -96,6 +97,7 @@ Clock mClock = null; CapacitySchedulerConfiguration conf = null; CapacityScheduler cs = null; + @SuppressWarnings("rawtypes") EventHandler mDisp = null; ProportionalCapacityPreemptionPolicy policy = null; Resource clusterResource = null; @@ -247,6 +249,7 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { if (containerId == 1) { when(rmc.isAMContainer()).thenReturn(true); when(app.getAMResource(label)).thenReturn(res); + when(app.getAppAMNodePartitionName()).thenReturn(label); } if (reserved) { @@ -280,6 +283,12 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { containerId++; } + // If app has 0 container, and it has only pending, still make sure to + // update label. + if (repeat == 0) { + when(app.getAppAMNodePartitionName()).thenReturn(label); + } + // Some more app specific aggregated data can be better filled here. when(app.getPriority()).thenReturn(pri); when(app.getUser()).thenReturn(userName); @@ -315,10 +324,15 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { private void mockApplications(String appsConfig) { int id = 1; HashMap> userMap = new HashMap>(); + HashMap>> userResourceUsagePerLabel = new HashMap<>(); LeafQueue queue = null; + int mulp = -1; for (String a : appsConfig.split(";")) { String[] strs = a.split("\t"); String queueName = strs[0]; + if (mulp <= 0 && strs.length > 2 && strs[2] != null) { + mulp = 100 / (new Integer(strs[2]).intValue()); + } // get containers List liveContainers = new ArrayList(); @@ -338,6 +352,7 @@ private void mockApplications(String appsConfig) { when(app.getReservedContainers()).thenReturn(reservedContainers); when(app.getApplicationAttemptId()).thenReturn(appAttemptId); when(app.getApplicationId()).thenReturn(appId); + when(app.getQueueName()).thenReturn(queueName); // add to LeafQueue queue = (LeafQueue) nameToCSQueues.get(queueName); @@ -349,23 +364,71 @@ private void mockApplications(String appsConfig) { users = new HashSet(); userMap.put(queueName, users); } - users.add(app.getUser()); + + String label = app.getAppAMNodePartitionName(); + + // Get label to queue + HashMap> userResourceUsagePerQueue = userResourceUsagePerLabel + .get(label); + if (null == userResourceUsagePerQueue) { + userResourceUsagePerQueue = new HashMap<>(); + userResourceUsagePerLabel.put(label, userResourceUsagePerQueue); + } + + // Get queue to user based resource map + HashMap userResourceUsage = userResourceUsagePerQueue + .get(queueName); + if (null == userResourceUsage) { + userResourceUsage = new HashMap<>(); + userResourceUsagePerQueue.put(queueName, userResourceUsage); + } + + // Get user to its resource usage. + ResourceUsage usage = userResourceUsage.get(app.getUser()); + if (null == usage) { + usage = new ResourceUsage(); + userResourceUsage.put(app.getUser(), usage); + } + + usage.incAMUsed(app.getAMResource(label)); + usage.incUsed(app.getAppAttemptResourceUsage().getUsed(label)); id++; } - for (String queueName : userMap.keySet()) { - queue = (LeafQueue) nameToCSQueues.get(queueName); - // Currently we have user-limit test support only for default label. - Resource totResoucePerPartition = partitionToResource.get(""); - Resource capacity = Resources.multiply(totResoucePerPartition, - queue.getQueueCapacities().getAbsoluteCapacity()); - HashSet users = userMap.get(queue.getQueueName()); - Resource userLimit = Resources.divideAndCeil(rc, capacity, users.size()); - for (String userName : users) { - when(queue.getResourceLimitForAllUsers(eq(userName), - any(Resource.class), anyString(), any(SchedulingMode.class))) - .thenReturn(userLimit); + for (String label : userResourceUsagePerLabel.keySet()) { + for (String queueName : userMap.keySet()) { + queue = (LeafQueue) nameToCSQueues.get(queueName); + // Currently we have user-limit test support only for default label. + Resource totResoucePerPartition = partitionToResource.get(""); + Resource capacity = Resources.multiply(totResoucePerPartition, + queue.getQueueCapacities().getAbsoluteCapacity()); + HashSet users = userMap.get(queue.getQueueName()); + when(queue.getAllUsers()).thenReturn(users); + Resource userLimit; + if (mulp > 0) { + userLimit = Resources.divideAndCeil(rc, capacity, mulp); + } else { + userLimit = Resources.divideAndCeil(rc, capacity, + users.size()); + } + LOG.debug("Updating user-limit from mock: totResoucePerPartition=" + + totResoucePerPartition + ", capacity=" + capacity + + ", users.size()=" + users.size() + ", userlimit= " + userLimit + + ",label= " + label + ",queueName= " + queueName); + + HashMap userResourceUsage = userResourceUsagePerLabel + .get(label).get(queueName); + for (String userName : users) { + User user = new User(userName); + if (userResourceUsage != null) { + user.setResourceUsage(userResourceUsage.get(userName)); + } + when(queue.getUser(eq(userName))).thenReturn(user); + when(queue.getResourceLimitForAllUsers(eq(userName), + any(Resource.class), anyString(), any(SchedulingMode.class))) + .thenReturn(userLimit); + } } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java index bf83e1c..bdea3c0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java @@ -62,12 +62,16 @@ public void testSimpleIntraQueuePreemption() throws IOException { * Apps which are running at low priority (4) will preempt few of its * resources to meet the demand. */ + + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); + String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label "n1= res=100"; String queuesConfig = // guaranteed,max,used,pending,reserved - "root(=[100 100 80 120 0]);" + // root + "root(=[100 100 79 120 0]);" + // root "-a(=[11 100 11 50 0]);" + // a "-b(=[40 100 38 60 0]);" + // b "-c(=[20 100 10 10 0]);" + // c @@ -304,6 +308,8 @@ public void testLimitPreemptionWithMaxIntraQueuePreemptableLimit() conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -357,6 +363,8 @@ public void testLimitPreemptionWithTotalPreemptedResourceAllowed() // report "ideal" preempt as 10%. Ensure preemption happens only for 10% conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, (float) 0.1); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -411,6 +419,8 @@ public void testAlreadySelectedContainerFromInterQueuePreemption() conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -418,7 +428,7 @@ public void testAlreadySelectedContainerFromInterQueuePreemption() String queuesConfig = // guaranteed,max,used,pending,reserved "root(=[100 100 95 170 0]);" + // root - "-a(=[60 100 70 50 0]);" + // a + "-a(=[60 100 70 35 0]);" + // a "-b(=[40 100 25 120 0])"; // b String appsConfig = @@ -467,6 +477,8 @@ public void testSkipAMContainersInInterQueuePreemption() throws IOException { conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -516,6 +528,8 @@ public void testSkipAMContainersInInterQueuePreemptionSingleApp() * cycle. Eventhough there are more demand and no other low priority * apps are present, still AM contaier need to soared. */ + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -660,6 +674,8 @@ public void testNodePartitionIntraQueuePreemption() throws IOException { conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;" + // default partition "x=100,true"; // partition=x @@ -720,6 +736,8 @@ public void testComplexIntraQueuePreemption() throws IOException { conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;"; String nodesConfig = // n1 has no label @@ -840,8 +858,10 @@ public void testIntraQueuePreemptionWithTwoUsers() policy.editSchedule(); // Considering user-limit of 50% since only 2 users are there, only preempt - // 15 more (5 is already running) eventhough demand is for 30. - verify(mDisp, times(15)).handle(argThat( + // 14 more (5 is already running) eventhough demand is for 30. Ideally we + // must preempt 15. But 15th container will bring user1's usage to 20 which + // is same as user-limit. Hence skip 15th container. + verify(mDisp, times(14)).handle(argThat( new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( getAppAttemptId(3)))); } @@ -869,6 +889,8 @@ public void testComplexNodePartitionIntraQueuePreemption() conf.setFloat(CapacitySchedulerConfiguration. INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, (float) 0.5); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); String labelsConfig = "=100,true;" + // default partition "x=100,true"; // partition=x diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java new file mode 100644 index 0000000..c7a5487 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java @@ -0,0 +1,899 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +import static org.mockito.Matchers.argThat; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * Test class for IntraQueuePreemption scenarios. + */ +public class TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit + extends + ProportionalCapacityPreemptionPolicyMockFramework { + @Before + public void setup() { + super.setup(); + conf.setBoolean( + CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ENABLED, true); + policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock); + } + + @Test + public void testSimpleIntraQueuePreemptionWithTwoUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 100 | 0 | + * | app2 | user2 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 100. app2 from user2 has a demand + * of 30, and UL is 50. 30 would be preempted from app1. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1. + verify(mDisp, times(30)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testNoIntraQueuePreemptionWithSingleUser() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 100 | 0 | + * | app2 | user1 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Given single user, lower priority/late submitted apps has to + * wait. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user1)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource. Since app1,2 are from same user, there wont be + // any preemption. + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testNoIntraQueuePreemptionWithTwoUserUnderUserLimit() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 30 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 50. app2 from user2 has a demand + * of 30, and UL is 50. Since app1 is under UL, there should not be any + * preemption. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 30 0]);" + // root + "-a(=[100 100 80 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,50,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,30,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource. Since app1,2 are from same user, there wont be + // any preemption. + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testSimpleIntraQueuePreemptionWithTwoUsersWithAppPriority() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 2 | 100 | 0 | + * | app2 | user2 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 100. app2 from user2 has a demand + * of 30, and UL is 50. 30 would be preempted from app1. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(2,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1 even though its priority is more than app2. + verify(mDisp, times(30)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitWithMultipleApps() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 30 | 30 | + * | app2 | user2 | 1 | 20 | 20 | + * | app3 | user1 | 1 | 30 | 30 | + * | app4 | user2 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 90 0]);" + // root + "-a(=[100 100 80 90 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,30,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + + "a\t" // app3 in a + + "(1,1,n1,,30,false,30,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app3 (compare to app1, app3 has low priority). + verify(mDisp, times(9)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(3)))); + } + + @Test + public void testNoPreemptionOfUserLimitWithMultipleAppsAndSameUser() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 30 | 30 | + * | app2 | user1 | 1 | 20 | 20 | + * | app3 | user1 | 1 | 30 | 30 | + * | app4 | user1 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 90 0]);" + // root + "-a(=[100 100 80 90 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,30,false,20,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user1);" + + "a\t" // app3 in a + + "(1,1,n1,,30,false,30,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user1)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app3 (compare to app1, app3 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(3)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(4)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitWitAppsOfDifferentPriority() + throws IOException { + /** + * Queue structure is: + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 3 | 30 | 30 | + * | app2 | user2 | 1 | 20 | 20 | + * | app3 | user1 | 4 | 30 | 0 | + * | app4 | user2 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat( + CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 60 0]);" + // root + "-a(=[100 100 80 60 0])"; // b + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(3,1,n1,,30,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + "a\t" // app3 in a + + "(4,1,n1,,30,false,0,user1);" + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(9)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitInTwoQueues() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *      /   \
+     *     a     b
+     * 
+ * + * Guaranteed resource of a/b are 40:60 Total cluster resource = 100 + * maxIntraQueuePreemptableLimit by default is 50%. This test is to verify + * that intra-queue preemption could occur in two queues when user-limit + * irreuglarity is present in queue. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 90 80 0]);" + // root + "-a(=[60 100 55 60 0]);" + // a + "-b(=[40 100 35 20 0])"; // b + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(3,1,n1,,20,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + + "a\t" // app3 in a + + "(4,1,n1,,15,false,0,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2);" + + "b\t" // app5 in b + + "(3,1,n1,,25,false,10,user1);" + + "b\t" // app6 in b + + "(1,1,n1,,10,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(4)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(4)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(5)))); + } + + @Test + public void testIntraQueuePreemptionWithTwoRequestingUsers() + throws IOException { + /** + * Queue structure is: + * + *
+    *       root
+    *        |
+    *        a
+    * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 60 | 10 | + * | app2 | user2 | 1 | 40 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 20 0]);" + // root + "-a(=[100 100 100 20 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,60,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,40,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1. + verify(mDisp, times(9)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndLowPriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 0 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(0,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndSamePriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(1,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndHighPriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 5 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(5,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionWithUserLimitDeadzone() + throws IOException { + /** + * Queue structure is: + * + *
+    *       root
+    *        |
+    *        a
+    * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 60 | 10 | + * | app2 | user2 | 1 | 40 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps but also ensure that user's + * usage not coming under its user-limit. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 20 0]);" + // root + "-a(=[100 100 100 20 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,3,n1,,20,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,40,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // 3 resources (9GB) from app1. We will not preempt last container as it may + // pull user's usage under its user-limit. + verify(mDisp, times(3)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testIntraQueuePreemptionWithUserLimitDeadzoneAndPriority() + throws IOException { + /** + * Queue structure is: + * + *
+    *       root
+    *        |
+    *        a
+    * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 60 | 10 | + * | app2 | user2 | 1 | 40 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps but also ensure that user's + * usage not coming under its user-limit. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 20 0]);" + // root + "-a(=[100 100 100 20 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,3,n1,,20,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(2,1,n1,,0,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,40,false,20,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // 3 resources (9GB) from app1. We will not preempt last container as it may + // pull user's usage under its user-limit. + verify(mDisp, times(3)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + + // After first round, 3 containers were preempted from app1 and resource + // distribution will be like below. + appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,3,n1,,17,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(2,1,n1,,0,false,10,user1);" + // app2 a + "a\t" // app2 in a + + "(1,1,n1,,49,false,11,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 has priority demand within same user 'user1'. However user1's used + // is alredy under UL. Hence no preemption. We will still get 3 container + // while asserting as it was aleady selected in earlier round. + verify(mDisp, times(3)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } +} -- 2.10.1 (Apple Git-78)