From 91b7e1bae8b0cda21d64a8795b5fcb9422fbfb0a Mon Sep 17 00:00:00 2001 From: Sunil G Date: Fri, 21 Apr 2017 16:47:51 +0530 Subject: [PATCH] YARN-2113 --- .../CapacitySchedulerPreemptionContext.java | 5 + .../capacity/CapacitySchedulerPreemptionUtils.java | 6 +- .../capacity/FifoIntraQueuePreemptionPlugin.java | 281 ++++--- .../capacity/IntraQueueCandidatesSelector.java | 118 ++- .../IntraQueuePreemptionComputePlugin.java | 7 +- .../ProportionalCapacityPreemptionPolicy.java | 19 +- .../monitor/capacity/TempAppPerPartition.java | 6 +- .../monitor/capacity/TempUserPerPartition.java | 88 +++ .../capacity/CapacitySchedulerConfiguration.java | 8 + .../scheduler/capacity/LeafQueue.java | 11 +- .../scheduler/capacity/UsersManager.java | 11 +- ...ionalCapacityPreemptionPolicyMockFramework.java | 88 ++- ...ortionalCapacityPreemptionPolicyIntraQueue.java | 4 +- ...apacityPreemptionPolicyIntraQueueUserLimit.java | 819 +++++++++++++++++++++ 14 files changed, 1328 insertions(+), 143 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java index 982b1f1..bc73a96 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionContext.java @@ -18,9 +18,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; @@ -63,4 +65,7 @@ TempQueuePerPartition getQueueByPartition(String queueName, float getMinimumThresholdForIntraQueuePreemption(); float getMaxAllowableLimitForIntraQueuePreemption(); + + @Unstable + IntraQueuePreemptionOrder getIntraQueuePreemptionOrder(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java index abad2a1..3e03d82 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/CapacitySchedulerPreemptionUtils.java @@ -99,7 +99,7 @@ public static void deductPreemptableResourcesBasedSelectedCandidates( } deductPreemptableResourcePerApp(context, tq.totalPartitionResource, - tas, res, partition); + tas, res); } } } @@ -108,10 +108,10 @@ public static void deductPreemptableResourcesBasedSelectedCandidates( private static void deductPreemptableResourcePerApp( CapacitySchedulerPreemptionContext context, Resource totalPartitionResource, Collection tas, - Resource res, String partition) { + Resource res) { for (TempAppPerPartition ta : tas) { ta.deductActuallyToBePreempted(context.getResourceCalculator(), - totalPartitionResource, res, partition); + totalPartitionResource, res); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 5f1af1e..a580151 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -18,11 +18,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.PriorityQueue; import java.util.Set; @@ -33,7 +36,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.IntraQueueCandidatesSelector.TAPriorityComparator; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; @@ -61,6 +66,26 @@ public FifoIntraQueuePreemptionPlugin(ResourceCalculator rc, } @Override + public Collection getPreemptableApps(String queueName, + String partition) { + TempQueuePerPartition tq = context.getQueueByPartition(queueName, + partition); + + List apps = new ArrayList(); + for (TempAppPerPartition tmpApp : tq.getApps()) { + // If a lower priority app was not selected to get preempted, mark such + // apps out from preemption candidate selection. + if (Resources.equals(tmpApp.getActuallyToBePreempted(), + Resources.none())) { + continue; + } + + apps.add(tmpApp.app); + } + return apps; + } + + @Override public Map getResourceDemandFromAppsPerQueue( String queueName, String partition) { @@ -90,7 +115,7 @@ public FifoIntraQueuePreemptionPlugin(ResourceCalculator rc, @Override public void computeAppsIdealAllocation(Resource clusterResource, - Resource partitionBasedResource, TempQueuePerPartition tq, + TempQueuePerPartition tq, Map> selectedCandidates, Resource totalPreemptedResourceAllowed, Resource queueReassignableResource, float maxAllowablePreemptLimit) { @@ -111,19 +136,22 @@ public void computeAppsIdealAllocation(Resource clusterResource, return; } + // This will hold a temp user data structure and will hold userlimit, + // idealAssigned, used etc to help calculation at each stage. + Map usersPerPartition = new LinkedHashMap<>(); + // 3. Create all tempApps for internal calculation and return a list from // high priority to low priority order. TAPriorityComparator taComparator = new TAPriorityComparator(); - PriorityQueue orderedByPriority = - createTempAppForResCalculation(tq.partition, apps, taComparator); + PriorityQueue orderedByPriority = createTempAppForResCalculation( + tq, apps, taComparator, clusterResource, usersPerPartition, perUserAMUsed); // 4. Calculate idealAssigned per app by checking based on queue's // unallocated resource.Also return apps arranged from lower priority to // higher priority. - TreeSet orderedApps = - calculateIdealAssignedResourcePerApp(clusterResource, - partitionBasedResource, tq, selectedCandidates, - queueReassignableResource, orderedByPriority, perUserAMUsed); + TreeSet orderedApps = calculateIdealAssignedResourcePerApp( + clusterResource, tq, selectedCandidates, queueReassignableResource, + orderedByPriority, usersPerPartition); // 5. A configurable limit that could define an ideal allowable preemption // limit. Based on current queue's capacity,defined how much % could become @@ -154,7 +182,8 @@ public void computeAppsIdealAllocation(Resource clusterResource, // 8. There are chances that we may preempt for the demand from same // priority level, such cases are to be validated out. validateOutSameAppPriorityFromDemand(clusterResource, - (TreeSet) tq.getApps()); + (TreeSet) orderedApps, usersPerPartition, + context.getIntraQueuePreemptionOrder()); if (LOG.isDebugEnabled()) { LOG.debug("Queue Name:" + tq.queueName + ", partition:" + tq.partition); @@ -222,32 +251,25 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, * } * * @param clusterResource Cluster Resource - * @param partitionBasedResource resource per partition * @param tq TempQueue * @param selectedCandidates Already Selected preemption candidates * @param queueReassignableResource Resource used in a queue * @param orderedByPriority List of running apps - * @param perUserAMUsed AM used resource + * @param usersPerPartition AM used resource * @return List of temp apps ordered from low to high priority */ private TreeSet calculateIdealAssignedResourcePerApp( - Resource clusterResource, Resource partitionBasedResource, - TempQueuePerPartition tq, + Resource clusterResource, TempQueuePerPartition tq, Map> selectedCandidates, Resource queueReassignableResource, PriorityQueue orderedByPriority, - Map perUserAMUsed) { + Map usersPerPartition) { Comparator reverseComp = Collections .reverseOrder(new TAPriorityComparator()); TreeSet orderedApps = new TreeSet<>(reverseComp); - - Map userIdealAssignedMapping = new HashMap<>(); String partition = tq.partition; - Map preCalculatedUserLimit = - new HashMap(); - while (!orderedByPriority.isEmpty()) { // Remove app from the next highest remaining priority and process it to // calculate idealAssigned per app. @@ -261,39 +283,13 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, } String userName = tmpApp.app.getUser(); - Resource userLimitResource = preCalculatedUserLimit.get(userName); - - // Verify whether we already calculated headroom for this user. - if (userLimitResource == null) { - userLimitResource = Resources.clone( - tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource, - partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY)); - - Resource amUsed = perUserAMUsed.get(userName); - if (null == amUsed) { - amUsed = Resources.createResource(0, 0); - } - - // Real AM used need not have to be considered for user-limit as well. - userLimitResource = Resources.subtract(userLimitResource, amUsed); - if (LOG.isDebugEnabled()) { - LOG.debug("Userlimit for user '" + userName + "' is :" - + userLimitResource + ", and amUsed is:" + amUsed); - } - - preCalculatedUserLimit.put(userName, userLimitResource); - } - - Resource idealAssignedForUser = userIdealAssignedMapping.get(userName); - - if (idealAssignedForUser == null) { - idealAssignedForUser = Resources.createResource(0, 0); - userIdealAssignedMapping.put(userName, idealAssignedForUser); - } + TempUserPerPartition tmpUser = usersPerPartition.get(userName); + Resource userLimitResource = tmpUser.getUserLimit(); + Resource idealAssignedForUser = tmpUser.idealAssigned; // Calculate total selected container resources from current app. - getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, - tmpApp, partition); + getAlreadySelectedPreemptionCandidatesResource(selectedCandidates, tmpApp, + tmpUser, partition); // For any app, used+pending will give its idealAssigned. However it will // be tightly linked to queue's unallocated quota. So lower priority apps @@ -304,10 +300,11 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, if (Resources.lessThan(rc, clusterResource, idealAssignedForUser, userLimitResource)) { - appIdealAssigned = Resources.min(rc, clusterResource, appIdealAssigned, + Resource idealAssigned = Resources.min(rc, clusterResource, + appIdealAssigned, Resources.subtract(userLimitResource, idealAssignedForUser)); tmpApp.idealAssigned = Resources.clone(Resources.min(rc, - clusterResource, queueReassignableResource, appIdealAssigned)); + clusterResource, queueReassignableResource, idealAssigned)); Resources.addTo(idealAssignedForUser, tmpApp.idealAssigned); } else { continue; @@ -334,7 +331,8 @@ private void calculateToBePreemptedResourcePerApp(Resource clusterResource, */ private void getAlreadySelectedPreemptionCandidatesResource( Map> selectedCandidates, - TempAppPerPartition tmpApp, String partition) { + TempAppPerPartition tmpApp, TempUserPerPartition tmpUser, + String partition) { tmpApp.selected = Resources.createResource(0, 0); Set containers = selectedCandidates .get(tmpApp.app.getApplicationAttemptId()); @@ -346,16 +344,21 @@ private void getAlreadySelectedPreemptionCandidatesResource( for (RMContainer cont : containers) { if (partition.equals(cont.getNodeLabelExpression())) { Resources.addTo(tmpApp.selected, cont.getAllocatedResource()); + Resources.addTo(tmpUser.selected, cont.getAllocatedResource()); } } } private PriorityQueue createTempAppForResCalculation( - String partition, Collection apps, - TAPriorityComparator taComparator) { + TempQueuePerPartition tq, Collection apps, + TAPriorityComparator taComparator, Resource clusterResource, + Map usersPerPartition, + Map perUserAMUsed) { PriorityQueue orderedByPriority = new PriorityQueue<>( 100, taComparator); + String partition = tq.partition; + // have an internal temp app structure to store intermediate data(priority) for (FiCaSchedulerApp app : apps) { @@ -387,56 +390,155 @@ private void getAlreadySelectedPreemptionCandidatesResource( tmpApp.idealAssigned = Resources.createResource(0, 0); orderedByPriority.add(tmpApp); + + // Create a TempUserPerPartition structure to hold more information + // regarding each user's entities such as UserLimit etc. This could + // be kept in a user to TempUserPerPartition map for further reference. + String userName = app.getUser(); + if (!usersPerPartition.containsKey(userName)) { + ResourceUsage userResourceUsage = tq.leafQueue.getUser(userName) + .getResourceUsage(); + + TempUserPerPartition tmpUser = new TempUserPerPartition( + tq.leafQueue.getUser(userName), tq.queueName, + Resources.clone(userResourceUsage.getUsed(partition)), + Resources.clone(perUserAMUsed.get(userName)), + Resources.clone(userResourceUsage.getReserved(partition)), + Resources.none()); + + Resource userLimitResource = Resources.clone( + tq.leafQueue.getResourceLimitForAllUsers(userName, clusterResource, + partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY)); + + // Real AM used need not have to be considered for user-limit as well. + userLimitResource = Resources.subtract(userLimitResource, tmpUser.amUsed); + tmpUser.setUserLimit(userLimitResource); + + if (LOG.isDebugEnabled()) { + LOG.debug("TempUser:" + tmpUser); + } + + tmpUser.idealAssigned = Resources.createResource(0, 0); + usersPerPartition.put(userName, tmpUser); + } } return orderedByPriority; } /* * Fifo+Priority based preemption policy need not have to preempt resources at - * same priority level. Such cases will be validated out. + * same priority level. Such cases will be validated out. But if the demand is + * from an app of different user, force to preempt resources even if apps are + * at same priority. */ public void validateOutSameAppPriorityFromDemand(Resource cluster, - TreeSet appsOrderedfromLowerPriority) { + TreeSet orderedApps, + Map usersPerPartition, + IntraQueuePreemptionOrder intraQueuePreemptionOrder ) { - TempAppPerPartition[] apps = appsOrderedfromLowerPriority - .toArray(new TempAppPerPartition[appsOrderedfromLowerPriority.size()]); + TempAppPerPartition[] apps = orderedApps + .toArray(new TempAppPerPartition[orderedApps.size()]); if (apps.length <= 0) { return; } - int lPriority = 0; - int hPriority = apps.length - 1; - - while (lPriority < hPriority - && !apps[lPriority].equals(apps[hPriority]) - && apps[lPriority].getPriority() < apps[hPriority].getPriority()) { - Resource toPreemptFromOther = apps[hPriority] - .getToBePreemptFromOther(); - Resource actuallyToPreempt = apps[lPriority].getActuallyToBePreempted(); - Resource delta = Resources.subtract(apps[lPriority].toBePreempted, - actuallyToPreempt); - - if (Resources.greaterThan(rc, cluster, delta, Resources.none())) { - Resource toPreempt = Resources.min(rc, cluster, - toPreemptFromOther, delta); - - apps[hPriority].setToBePreemptFromOther( - Resources.subtract(toPreemptFromOther, toPreempt)); - apps[lPriority].setActuallyToBePreempted( - Resources.add(actuallyToPreempt, toPreempt)); - } + for (int hPriority = apps.length - 1; hPriority >= 0; hPriority--) { - if (Resources.lessThanOrEqual(rc, cluster, - apps[lPriority].toBePreempted, - apps[lPriority].getActuallyToBePreempted())) { - lPriority++; - continue; - } + // Check whether high priority app with demand needs resource from other + // user. + if (Resources.greaterThan(rc, cluster, + apps[hPriority].getToBePreemptFromOther(), Resources.none())) { - if (Resources.equals(apps[hPriority].getToBePreemptFromOther(), - Resources.none())) { - hPriority--; - continue; + // Given we have a demand from a high priority app, we can do a reverse + // scan from lower priority apps to select resources. + // Since idealAssigned of each app has considered user-limit, this logic + // will provide eventual consistency w.r.t user-limit as well. + for (int lPriority = 0; lPriority < apps.length; lPriority++) { + + // Check whether app with demand needs resource from other user. + if (Resources.greaterThan(rc, cluster, apps[lPriority].toBePreempted, + Resources.none())) { + + // If apps are of same user, and priority is same, then skip. + if ((apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (apps[lPriority].getPriority() >= apps[hPriority] + .getPriority())) { + continue; + } + + if (Resources.lessThanOrEqual(rc, cluster, + apps[lPriority].toBePreempted, + apps[lPriority].getActuallyToBePreempted()) + || Resources.equals(apps[hPriority].getToBePreemptFromOther(), + Resources.none())) { + continue; + } + + // Ideally if any application has a higher priority, then it can + // force to preempt any lower priority app from any user. However + // if admin enforces user-limit over priority, preemption module + // will not choose lower priority apps from usre's who are not yet + // met its user-limit. + TempUserPerPartition tmpUser = usersPerPartition + .get(apps[lPriority].getUser()); + if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (tmpUser.isUserLimitReached(rc, cluster) == false) + && (intraQueuePreemptionOrder + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST))) { + continue; + } + + Resource toPreemptFromOther = apps[hPriority] + .getToBePreemptFromOther(); + Resource actuallyToPreempt = apps[lPriority] + .getActuallyToBePreempted(); + + // A lower priority app could offer more resource to preempt, if + // multiple higher priority/under served users needs resources. + // After one iteration, we need to ensure that actuallyToPreempt is + // subtracted from the resource to preempt. + Resource preemptableFromLowerPriorityApp = Resources + .subtract(apps[lPriority].toBePreempted, actuallyToPreempt); + + // In case of user-limit preemption, when app's are from different + // user and of same priority, we will do user-limit preemption if + // there is a demand from under UL quota app. + // However this under UL quota app's demand may be more. + // Still we should ensure that we are not doing over preemption such + // that only a maximum of (user's used - UL quota) could be + // preempted. + if ((!apps[hPriority].getUser().equals(apps[lPriority].getUser())) + && (apps[lPriority].getPriority() == apps[hPriority] + .getPriority()) + && (tmpUser.isUserLimitReached(rc, cluster) == true)) { + + Resource deltaULQuota = Resources + .subtract(tmpUser.getUsedDeductAM(), tmpUser.selected); + Resources.subtractFrom(deltaULQuota, tmpUser.getUserLimit()); + + if (tmpUser.isPreemptionQuotaForULDeltaDone()) { + deltaULQuota = Resources.createResource(0, 0); + } + + if (Resources.lessThan(rc, cluster, deltaULQuota, + preemptableFromLowerPriorityApp)) { + tmpUser.updatePreemptionQuotaForULDeltaAsDone(true); + preemptableFromLowerPriorityApp = deltaULQuota; + } + } + + if (Resources.greaterThan(rc, cluster, + preemptableFromLowerPriorityApp, Resources.none())) { + Resource toPreempt = Resources.min(rc, cluster, + toPreemptFromOther, preemptableFromLowerPriorityApp); + + apps[hPriority].setToBePreemptFromOther( + Resources.subtract(toPreemptFromOther, toPreempt)); + apps[lPriority].setActuallyToBePreempted( + Resources.add(actuallyToPreempt, toPreempt)); + } + } + } } } } @@ -456,6 +558,7 @@ private Resource calculateUsedAMResourcesPerQueue(String partition, Resources.addTo(userAMResource, app.getAMResource(partition)); Resources.addTo(amUsed, app.getAMResource(partition)); } + return amUsed; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java index 2890414..a59839d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueueCandidatesSelector.java @@ -23,16 +23,19 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy.IntraQueuePreemptionOrder; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.util.resource.Resources; import java.io.Serializable; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; -import java.util.Iterator; +import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -51,14 +54,14 @@ Comparator { @Override - public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { - Priority p1 = Priority.newInstance(tq1.getPriority()); - Priority p2 = Priority.newInstance(tq2.getPriority()); + public int compare(TempAppPerPartition ta1, TempAppPerPartition ta2) { + Priority p1 = Priority.newInstance(ta1.getPriority()); + Priority p2 = Priority.newInstance(ta2.getPriority()); if (!p1.equals(p2)) { return p1.compareTo(p2); } - return tq1.getApplicationId().compareTo(tq2.getApplicationId()); + return ta1.getApplicationId().compareTo(ta2.getApplicationId()); } } @@ -121,17 +124,29 @@ public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { Map resToObtainByPartition = fifoPreemptionComputePlugin .getResourceDemandFromAppsPerQueue(queueName, partition); - // 6. Based on the selected resource demand per partition, select + // Default preemption iterator considers only FIFO+priority. For + // userlimit preemption, its possible that some lower priority apps + // needs from high priority app of another user. Hence use apps + // ordered by userlimit starvation as well. + Collection apps = fifoPreemptionComputePlugin + .getPreemptableApps(queueName, partition); + + // 6. Get user-limit to ensure that we do not preempt resources which + // will force user's resource to come under its UL. + Map computedUserLimitPerUser = new HashMap<>(); + Map rollingResourceUsagePerUser = new HashMap<>(); + initializeUsageAndUserLimitForCompute(clusterResource, partition, + leafQueue, computedUserLimitPerUser, rollingResourceUsagePerUser); + + // 7. Based on the selected resource demand per partition, select // containers with known policy from inter-queue preemption. try { leafQueue.getReadLock().lock(); - Iterator desc = leafQueue.getOrderingPolicy() - .getPreemptionIterator(); - while (desc.hasNext()) { - FiCaSchedulerApp app = desc.next(); - preemptFromLeastStarvedApp(selectedCandidates, clusterResource, - totalPreemptedResourceAllowed, resToObtainByPartition, - leafQueue, app); + for (FiCaSchedulerApp app : apps) { + preemptFromLeastStarvedApp(leafQueue, app, selectedCandidates, + clusterResource, totalPreemptedResourceAllowed, + resToObtainByPartition, computedUserLimitPerUser, + rollingResourceUsagePerUser); } } finally { leafQueue.getReadLock().unlock(); @@ -142,11 +157,33 @@ public int compare(TempAppPerPartition tq1, TempAppPerPartition tq2) { return selectedCandidates; } - private void preemptFromLeastStarvedApp( + private void initializeUsageAndUserLimitForCompute(Resource clusterResource, + String partition, LeafQueue leafQueue, + Map computedUserLimitPerUser, + Map rollingResourceUsagePerUser) { + for (String user : leafQueue.getAllUsers()) { + Resource userLimitResource = Resources.clone( + leafQueue.getResourceLimitForAllUsers(user, clusterResource, + partition, SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY)); + computedUserLimitPerUser.put(user, userLimitResource); + + // Initialize used resource of a given user for rolling computation. + rollingResourceUsagePerUser.put(user, + leafQueue.getUser(user).getResourceUsage().getUsed(partition)); + if (LOG.isDebugEnabled()) { + LOG.debug("Rolling resource usage for user:" + user + " is : " + + rollingResourceUsagePerUser.get(user)); + } + } + } + + private void preemptFromLeastStarvedApp(LeafQueue leafQueue, + FiCaSchedulerApp app, Map> selectedCandidates, Resource clusterResource, Resource totalPreemptedResourceAllowed, - Map resToObtainByPartition, LeafQueue leafQueue, - FiCaSchedulerApp app) { + Map resToObtainByPartition, + Map computedUserLimitPerUser, + Map rollingResourceUsagePerUser) { // ToDo: Reuse reservation selector here. @@ -160,6 +197,7 @@ private void preemptFromLeastStarvedApp( + totalPreemptedResourceAllowed); } + Resource userResourceUsage = rollingResourceUsagePerUser.get(app.getUser()); for (RMContainer c : liveContainers) { // if there are no demand, return. @@ -184,12 +222,38 @@ private void preemptFromLeastStarvedApp( continue; } + // If selected container brings down resource usage under its user's + // UserLimit, we must skip such containers. + if (Resources.lessThan(rc, clusterResource, + Resources.subtract(userResourceUsage, c.getAllocatedResource()), + computedUserLimitPerUser.get(app.getUser())) + && preemptionContext.getIntraQueuePreemptionOrder() + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST)) { + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Skipping container: " + c.getContainerId() + " with resource:" + + c.getAllocatedResource() + " as UserLimit for user:" + + app.getUser() + " with resource usage: " + userResourceUsage + + " is going under UL:" + + computedUserLimitPerUser.get(app.getUser())); + } + break; + } + // Try to preempt this container - CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain( - rc, preemptionContext, resToObtainByPartition, c, clusterResource, - selectedCandidates, totalPreemptedResourceAllowed); + boolean ret = CapacitySchedulerPreemptionUtils + .tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, + resToObtainByPartition, c, clusterResource, selectedCandidates, + totalPreemptedResourceAllowed); + + // Subtract from respective user's resource usage once a container is + // selected for preemption. + if (ret && preemptionContext.getIntraQueuePreemptionOrder() + .equals(IntraQueuePreemptionOrder.USERLIMIT_FIRST)) { + Resources.subtractFrom(userResourceUsage, c.getAllocatedResource()); + } } - } private void computeIntraQueuePreemptionDemand(Resource clusterResource, @@ -205,12 +269,7 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource, continue; } - // 2. Its better to get partition based resource limit earlier before - // starting calculation - Resource partitionBasedResource = - context.getPartitionResource(partition); - - // 3. loop through all queues corresponding to a partition. + // 2. loop through all queues corresponding to a partition. for (String queueName : queueNames) { TempQueuePerPartition tq = context.getQueueByPartition(queueName, partition); @@ -221,12 +280,12 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource, continue; } - // 4. Consider reassignableResource as (used - actuallyToBePreempted). + // 3. Consider reassignableResource as (used - actuallyToBePreempted). // This provides as upper limit to split apps quota in a queue. Resource queueReassignableResource = Resources.subtract(tq.getUsed(), tq.getActuallyToBePreempted()); - // 5. Check queue's used capacity. Make sure that the used capacity is + // 4. Check queue's used capacity. Make sure that the used capacity is // above certain limit to consider for intra queue preemption. if (leafQueue.getQueueCapacities().getUsedCapacity(partition) < context .getMinimumThresholdForIntraQueuePreemption()) { @@ -236,8 +295,7 @@ private void computeIntraQueuePreemptionDemand(Resource clusterResource, // 6. compute the allocation of all apps based on queue's unallocated // capacity fifoPreemptionComputePlugin.computeAppsIdealAllocation(clusterResource, - partitionBasedResource, tq, selectedCandidates, - totalPreemptedResourceAllowed, + tq, selectedCandidates, totalPreemptedResourceAllowed, queueReassignableResource, context.getMaxAllowableLimitForIntraQueuePreemption()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java index 93ebe65..edfbb2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/IntraQueuePreemptionComputePlugin.java @@ -18,12 +18,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; +import java.util.Collection; import java.util.Map; import java.util.Set; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; interface IntraQueuePreemptionComputePlugin { @@ -32,8 +34,11 @@ String partition); void computeAppsIdealAllocation(Resource clusterResource, - Resource partitionBasedResource, TempQueuePerPartition tq, + TempQueuePerPartition tq, Map> selectedCandidates, Resource totalPreemptedResourceAllowed, Resource queueTotalUnassigned, float maxAllowablePreemptLimit); + + Collection getPreemptableApps(String queueName, + String partition); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java index 3bf6994..770b87d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java @@ -22,6 +22,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -80,6 +81,12 @@ */ public class ProportionalCapacityPreemptionPolicy implements SchedulingEditPolicy, CapacitySchedulerPreemptionContext { + + @Unstable + public enum IntraQueuePreemptionOrder { + PRIORITY_FIRST, USERLIMIT_FIRST; + } + private static final Log LOG = LogFactory.getLog(ProportionalCapacityPreemptionPolicy.class); @@ -96,6 +103,7 @@ private float maxAllowableLimitForIntraQueuePreemption; private float minimumThresholdForIntraQueuePreemption; + private IntraQueuePreemptionOrder intraQueuePreemptionOrder; // Pointer to other RM components private RMContext rmContext; @@ -191,6 +199,11 @@ public void init(Configuration config, RMContext context, CapacitySchedulerConfiguration. DEFAULT_INTRAQUEUE_PREEMPTION_MINIMUM_THRESHOLD); + intraQueuePreemptionOrder = IntraQueuePreemptionOrder.valueOf(csConfig + .get(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + CapacitySchedulerConfiguration.DEFAULT_INTRAQUEUE_PREEMPTION_ORDER) + .toUpperCase()); + rc = scheduler.getResourceCalculator(); nlm = scheduler.getRMContext().getNodeLabelManager(); @@ -243,7 +256,6 @@ public synchronized void editSchedule() { } } - @SuppressWarnings("unchecked") private void preemptOrkillSelectedContainerAfterWait( Map> selectedCandidates, long currentTime) { @@ -657,4 +669,9 @@ public void addPartitionToUnderServedQueues(String queueName, } underServedQueues.add(queueName); } + + @Override + public IntraQueuePreemptionOrder getIntraQueuePreemptionOrder() { + return intraQueuePreemptionOrder; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java index fccd2a7..e9a934b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempAppPerPartition.java @@ -91,8 +91,12 @@ public ApplicationId getApplicationId() { return applicationId; } + public String getUser() { + return this.app.getUser(); + } + public void deductActuallyToBePreempted(ResourceCalculator resourceCalculator, - Resource cluster, Resource toBeDeduct, String partition) { + Resource cluster, Resource toBeDeduct) { if (Resources.greaterThan(resourceCalculator, cluster, getActuallyToBePreempted(), toBeDeduct)) { Resources.subtractFrom(getActuallyToBePreempted(), toBeDeduct); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java new file mode 100644 index 0000000..33ee18f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempUserPerPartition.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + + +/** + * Temporary data-structure tracking resource availability, pending resource + * need, current utilization for an application. + */ +public class TempUserPerPartition extends AbstractPreemptionEntity { + + private final User user; + private Resource userLimit; + private boolean donePreemptionQuotaForULDelta = false; + + TempUserPerPartition(User user, String queueName, Resource usedPerPartition, + Resource amUsedPerPartition, Resource reserved, + Resource pendingPerPartition) { + super(queueName, usedPerPartition, amUsedPerPartition, reserved, + pendingPerPartition); + this.user = user; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(" NAME: " + getUserName()).append(" CUR: ").append(getUsed()) + .append(" PEN: ").append(pending).append(" RESERVED: ").append(reserved) + .append(" AM_USED: ").append(amUsed).append(" USER_LIMIT: ") + .append(getUserLimit()).append(" IDEAL_ASSIGNED: ") + .append(idealAssigned).append(" USED_WO_AMUSED: ") + .append(getUsedDeductAM()).append(" IDEAL_PREEMPT: ") + .append(toBePreempted).append(" ACTUAL_PREEMPT: ") + .append(getActuallyToBePreempted()).append("\n"); + + return sb.toString(); + } + + public String getUserName() { + return user.getUserName(); + } + + public Resource getUserLimit() { + return userLimit; + } + + public void setUserLimit(Resource userLimitResource) { + this.userLimit = userLimitResource; + } + + public boolean isUserLimitReached(ResourceCalculator rc, + Resource clusterResource) { + if (Resources.greaterThan(rc, clusterResource, getUsedDeductAM(), + userLimit)) { + return true; + } + return false; + } + + public boolean isPreemptionQuotaForULDeltaDone() { + return this.donePreemptionQuotaForULDelta; + } + + public void updatePreemptionQuotaForULDeltaAsDone(boolean done) { + this.donePreemptionQuotaForULDelta = done; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 9fb92ec..24efedb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -1234,6 +1234,14 @@ public boolean getLazyPreemptionEnabled() { 0.2f; /** + * For intra-queue preemption, enforce a preemption order such as + * "userlimit_first" or "priority_first". + */ + public static final String INTRAQUEUE_PREEMPTION_ORDER = PREEMPTION_CONFIG_PREFIX + + INTRA_QUEUE_PREEMPTION_CONFIG_PREFIX + "preemption-order"; + public static final String DEFAULT_INTRAQUEUE_PREEMPTION_ORDER = "userlimit_first"; + + /** * Maximum application for a queue to be used when application per queue is * not defined.To be consistent with previous version the default value is set * as UNDEFINED. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 9059ef0..104e95e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -43,12 +43,10 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.security.AccessType; -import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; @@ -56,7 +54,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.*; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; @@ -2022,4 +2019,12 @@ public void stopQueue() { writeLock.unlock(); } } + + /** + * Get all valid users in this queue. + * @return user list + */ + public Set getAllUsers() { + return this.getUsersManager().getUsers().keySet(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java index c2134eb..579c4c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/UsersManager.java @@ -253,6 +253,15 @@ public Resource getUserResourceLimit() { public void setUserResourceLimit(Resource userResourceLimit) { this.userResourceLimit = userResourceLimit; } + + public String getUserName() { + return this.userName; + } + + @VisibleForTesting + public void setResourceUsage(ResourceUsage resourceUsage) { + this.userResourceUsage = resourceUsage; + } } /* End of User class */ /** @@ -344,7 +353,7 @@ public void userLimitNeedsRecompute() { /* * Get all users of queue. */ - private Map getUsers() { + public Map getUsers() { return users; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java index a9e97fd..70a8626 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicyMockFramework.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -96,6 +97,7 @@ Clock mClock = null; CapacitySchedulerConfiguration conf = null; CapacityScheduler cs = null; + @SuppressWarnings("rawtypes") EventHandler mDisp = null; ProportionalCapacityPreemptionPolicy policy = null; Resource clusterResource = null; @@ -247,6 +249,7 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { if (containerId == 1) { when(rmc.isAMContainer()).thenReturn(true); when(app.getAMResource(label)).thenReturn(res); + when(app.getAppAMNodePartitionName()).thenReturn(label); } if (reserved) { @@ -280,6 +283,12 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { containerId++; } + // If app has 0 container, and it has only pending, still make sure to + // update label. + if (repeat == 0) { + when(app.getAppAMNodePartitionName()).thenReturn(label); + } + // Some more app specific aggregated data can be better filled here. when(app.getPriority()).thenReturn(pri); when(app.getUser()).thenReturn(userName); @@ -315,10 +324,15 @@ public Integer answer(InvocationOnMock invocation) throws Throwable { private void mockApplications(String appsConfig) { int id = 1; HashMap> userMap = new HashMap>(); + HashMap>> userResourceUsagePerLabel = new HashMap<>(); LeafQueue queue = null; + int mulp = -1; for (String a : appsConfig.split(";")) { String[] strs = a.split("\t"); String queueName = strs[0]; + if (mulp <= 0 && strs.length > 2 && strs[2] != null) { + mulp = 100 / (new Integer(strs[2]).intValue()); + } // get containers List liveContainers = new ArrayList(); @@ -349,23 +363,71 @@ private void mockApplications(String appsConfig) { users = new HashSet(); userMap.put(queueName, users); } - users.add(app.getUser()); + + String label = app.getAppAMNodePartitionName(); + + // Get label to queue + HashMap> userResourceUsagePerQueue = userResourceUsagePerLabel + .get(label); + if (null == userResourceUsagePerQueue) { + userResourceUsagePerQueue = new HashMap<>(); + userResourceUsagePerLabel.put(label, userResourceUsagePerQueue); + } + + // Get queue to user based resource map + HashMap userResourceUsage = userResourceUsagePerQueue + .get(queueName); + if (null == userResourceUsage) { + userResourceUsage = new HashMap<>(); + userResourceUsagePerQueue.put(queueName, userResourceUsage); + } + + // Get user to its resource usage. + ResourceUsage usage = userResourceUsage.get(app.getUser()); + if (null == usage) { + usage = new ResourceUsage(); + userResourceUsage.put(app.getUser(), usage); + } + + usage.incAMUsed(app.getAMResource(label)); + usage.incUsed(app.getAppAttemptResourceUsage().getUsed(label)); id++; } - for (String queueName : userMap.keySet()) { - queue = (LeafQueue) nameToCSQueues.get(queueName); - // Currently we have user-limit test support only for default label. - Resource totResoucePerPartition = partitionToResource.get(""); - Resource capacity = Resources.multiply(totResoucePerPartition, - queue.getQueueCapacities().getAbsoluteCapacity()); - HashSet users = userMap.get(queue.getQueueName()); - Resource userLimit = Resources.divideAndCeil(rc, capacity, users.size()); - for (String userName : users) { - when(queue.getResourceLimitForAllUsers(eq(userName), - any(Resource.class), anyString(), any(SchedulingMode.class))) - .thenReturn(userLimit); + for (String label : userResourceUsagePerLabel.keySet()) { + for (String queueName : userMap.keySet()) { + queue = (LeafQueue) nameToCSQueues.get(queueName); + // Currently we have user-limit test support only for default label. + Resource totResoucePerPartition = partitionToResource.get(""); + Resource capacity = Resources.multiply(totResoucePerPartition, + queue.getQueueCapacities().getAbsoluteCapacity()); + HashSet users = userMap.get(queue.getQueueName()); + when(queue.getAllUsers()).thenReturn(users); + Resource userLimit; + if (mulp > 0) { + userLimit = Resources.divideAndCeil(rc, capacity, mulp); + } else { + userLimit = Resources.divideAndCeil(rc, capacity, + users.size()); + } + LOG.debug("Updating user-limit from mock: totResoucePerPartition=" + + totResoucePerPartition + ", capacity=" + capacity + + ", users.size()=" + users.size() + ", userlimit= " + userLimit + + ",label= " + label + ",queueName= " + queueName); + + HashMap userResourceUsage = userResourceUsagePerLabel + .get(label).get(queueName); + for (String userName : users) { + User user = new User(userName); + if (userResourceUsage != null) { + user.setResourceUsage(userResourceUsage.get(userName)); + } + when(queue.getUser(eq(userName))).thenReturn(user); + when(queue.getResourceLimitForAllUsers(eq(userName), + any(Resource.class), anyString(), any(SchedulingMode.class))) + .thenReturn(userLimit); + } } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java index bf83e1c..490f4b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueue.java @@ -41,6 +41,8 @@ public void setup() { super.setup(); conf.setBoolean( CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ENABLED, true); + conf.set(CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ORDER, + "priority_first"); policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock); } @@ -418,7 +420,7 @@ public void testAlreadySelectedContainerFromInterQueuePreemption() String queuesConfig = // guaranteed,max,used,pending,reserved "root(=[100 100 95 170 0]);" + // root - "-a(=[60 100 70 50 0]);" + // a + "-a(=[60 100 70 35 0]);" + // a "-b(=[40 100 25 120 0])"; // b String appsConfig = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java new file mode 100644 index 0000000..76ce130 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit.java @@ -0,0 +1,819 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; + +import static org.mockito.Matchers.argThat; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +/** + * Test class for IntraQueuePreemption scenarios. + */ +public class TestProportionalCapacityPreemptionPolicyIntraQueueUserLimit + extends + ProportionalCapacityPreemptionPolicyMockFramework { + @Before + public void setup() { + super.setup(); + conf.setBoolean( + CapacitySchedulerConfiguration.INTRAQUEUE_PREEMPTION_ENABLED, true); + policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock); + } + + @Test + public void testSimpleIntraQueuePreemptionWithTwoUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 100 | 0 | + * | app2 | user2 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 100. app2 from user2 has a demand + * of 30, and UL is 50. 30 would be preempted from app1. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1. + verify(mDisp, times(30)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testNoIntraQueuePreemptionWithSingleUser() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 100 | 0 | + * | app2 | user1 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Given single user, lower priority/late submitted apps has to + * wait. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user1)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource. Since app1,2 are from same user, there wont be + // any preemption. + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testNoIntraQueuePreemptionWithTwoUserUnderUserLimit() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 30 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 50. app2 from user2 has a demand + * of 30, and UL is 50. Since app1 is under UL, there should not be any + * preemption. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 30 0]);" + // root + "-a(=[100 100 80 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,50,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,30,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource. Since app1,2 are from same user, there wont be + // any preemption. + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testSimpleIntraQueuePreemptionWithTwoUsersWithAppPriority() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 2 | 100 | 0 | + * | app2 | user2 | 1 | 0 | 30 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. app1 of high priority + * has a demand of 0 and its already using 100. app2 from user2 has a demand + * of 30, and UL is 50. 30 would be preempted from app1. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 30 0]);" + // root + "-a(=[100 100 100 30 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(2,1,n1,,100,false,0,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,0,false,30,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1 even though its priority is more than app2. + verify(mDisp, times(30)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitWithMultipleApps() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 30 | 30 | + * | app2 | user2 | 1 | 20 | 20 | + * | app3 | user1 | 1 | 30 | 30 | + * | app4 | user2 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 90 0]);" + // root + "-a(=[100 100 80 90 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,30,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + + "a\t" // app3 in a + + "(1,1,n1,,30,false,30,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app3 (compare to app1, app3 has low priority). + verify(mDisp, times(10)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(3)))); + } + + @Test + public void testNoPreemptionOfUserLimitWithMultipleAppsAndSameUser() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 30 | 30 | + * | app2 | user1 | 1 | 20 | 20 | + * | app3 | user1 | 1 | 30 | 30 | + * | app4 | user1 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 90 0]);" + // root + "-a(=[100 100 80 90 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,30,false,20,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user1);" + + "a\t" // app3 in a + + "(1,1,n1,,30,false,30,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user1)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app3 (compare to app1, app3 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(3)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(4)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitWitAppsOfDifferentPriority() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 3 | 30 | 30 | + * | app2 | user2 | 1 | 20 | 20 | + * | app3 | user1 | 4 | 30 | 0 | + * | app4 | user2 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 80 60 0]);" + // root + "-a(=[100 100 80 60 0])"; // b + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(3,1,n1,,30,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + + "a\t" // app3 in a + + "(4,1,n1,,30,false,0,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(10)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + } + + @Test + public void testIntraQueuePreemptionOfUserLimitInTwoQueues() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *      /   \
+     *     a     b
+     * 
+ * + * Guaranteed resource of a/b are 40:60 Total cluster resource = 100 + * maxIntraQueuePreemptableLimit by default is 50%. This test is to verify + * that intra-queue preemption could occur in two queues when user-limit + * irreuglarity is present in queue. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 90 80 0]);" + // root + "-a(=[60 100 55 60 0]);" + // a + "-b(=[40 100 35 20 0])" ; // b + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(3,1,n1,,20,false,30,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,20,false,20,user2);" + + "a\t" // app3 in a + + "(4,1,n1,,15,false,0,user1);" + + "a\t" // app4 in a + + "(1,1,n1,,0,false,10,user2);" + + "b\t" // app5 in b + + "(3,1,n1,,25,false,10,user1);" + + "b\t" // app6 in b + + "(1,1,n1,,10,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(5)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(5)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(5)))); + } + + @Test + public void testIntraQueuePreemptionWithTwoRequestingUsers() + throws IOException { + /** + * Queue structure is: + * + *
+    *       root
+    *        |
+    *        a
+    * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 60 | 10 | + * | app2 | user2 | 1 | 40 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 20 0]);" + // root + "-a(=[100 100 100 20 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,1,n1,,60,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,40,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // resources from app1. + verify(mDisp, times(10)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndLowPriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 0 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(0,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndSamePriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 1 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(1,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionIfBelowUserLimitAndHighPriorityExtraUsers() + throws IOException { + /** + * Queue structure is: + * + *
+     *       root
+     *        |
+     *        a
+     * 
+ * + * Scenario: + * Preconditions: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 50 | 0 | + * | app2 | user2 | 1 | 50 | 0 | + * | app3 | user3 | 5 | 0 | 10 | + * +--------------+----------+------+---------+ + * This scenario should never preempt from either user1 or user2 + */ + + // Set max preemption per round to 50% (this is different from minimum user + // limit percent). + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.7); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 10 0]);" + // root + "-a(=[100 100 100 10 0])"; // a + + String appsConfig = + // queueName\t\ + // (priority,resource,host,label,#repeat,reserved,pending,user)\tMULP; + "a\t(1,1,n1,,50,false,0,user1)\t50;" + // app1, user1 + "a\t(1,1,n1,,50,false,0,user2)\t50;" + // app2, user2 + "a\t(5,1,n1,,0,false,10,user3)\t50"; // app3, user3 + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2/app4 needs more resource and its well under its user-limit. Hence + // preempt resources from app1 (compare to app3, app1 has low priority). + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } + + @Test + public void testNoIntraQueuePreemptionWithUserLimitDeadzone() + throws IOException { + /** + * Queue structure is: + * + *
+    *       root
+    *        |
+    *        a
+    * 
+ * + * Scenario: + * Queue total resources: 100 + * Minimum user limit percent: 50% + * +--------------+----------+------+---------+ + * | APP | USER | PRIORITY | USED | PENDING | + * +--------------+----------+------+---------+ + * | app1 | user1 | 1 | 60 | 10 | + * | app2 | user2 | 1 | 40 | 10 | + * +--------------+----------+------+---------+ + * Hence in queueA of 100, each user has a quota of 50. Now have multiple + * apps and check for preemption across apps but also ensure that user's + * usage not coming under its user-limit. + */ + + // Set max preemption limit as 50%. + conf.setFloat(CapacitySchedulerConfiguration. + INTRAQUEUE_PREEMPTION_MAX_ALLOWABLE_LIMIT, + (float) 0.5); + + String labelsConfig = "=100,true;"; + String nodesConfig = // n1 has no label + "n1= res=100"; + String queuesConfig = + // guaranteed,max,used,pending,reserved + "root(=[100 100 100 20 0]);" + // root + "-a(=[100 100 100 20 0])"; // a + + String appsConfig = + // queueName\t(priority,resource,host,expression,#repeat,reserved,pending) + "a\t" // app1 in a + + "(1,3,n1,,20,false,10,user1);" + // app1 a + "a\t" // app2 in a + + "(1,1,n1,,40,false,10,user2)"; + + buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig); + policy.editSchedule(); + + // app2 needs more resource and its well under its user-limit. Hence preempt + // 3 resources (9GB) from app1. We will not preempt last container as it may + // pull user's usage under its user-limit. + verify(mDisp, times(3)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(1)))); + verify(mDisp, times(0)).handle(argThat( + new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor( + getAppAttemptId(2)))); + } +} -- 2.10.1 (Apple Git-78)