diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java index 09edb987876..d1cc8501a92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/SchedulingMonitor.java @@ -79,7 +79,7 @@ public Thread newThread(Runnable r) { } private void schedulePreemptionChecker() { - handler = ses.scheduleAtFixedRate(new PreemptionChecker(), + handler = ses.scheduleAtFixedRate(new PolicyInvoker(), 0, monitorInterval, TimeUnit.MILLISECONDS); } @@ -99,7 +99,7 @@ public void invokePolicy(){ scheduleEditPolicy.editSchedule(); } - private class PreemptionChecker implements Runnable { + private class PolicyInvoker implements Runnable { @Override public void run() { try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java index 8327cb906e4..304d204c949 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java @@ -37,6 +37,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ManagedParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptableQueue; @@ -377,7 +380,9 @@ private void cleanupStaledPreemptionCandidates(long currentTime) { } private Set getLeafQueueNames(TempQueuePerPartition q) { - if (q.children == null || q.children.isEmpty()) { + // If its a ManagedParentQueue, it might not have any children + if ((q.children == null || q.children.isEmpty()) + && !(q.parentQueue instanceof ManagedParentQueue)) { return ImmutableSet.of(q.queueName); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java index 4d71223aaa3..fdeee5273f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TempQueuePerPartition.java @@ -21,6 +21,9 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ParentQueue; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.Resources; @@ -56,6 +59,7 @@ final ArrayList children; private Collection apps; LeafQueue leafQueue; + ParentQueue parentQueue; boolean preemptionDisabled; protected Resource pendingDeductReserved; @@ -90,6 +94,10 @@ pendingDeductReserved = Resources.createResource(0); } + if (ParentQueue.class.isAssignableFrom(queue.getClass())) { + parentQueue = (ParentQueue) queue; + } + this.normalizedGuarantee = new double[ResourceUtils .getNumberOfKnownResourceTypes()]; this.children = new ArrayList<>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacitySchedulerPlanFollower.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacitySchedulerPlanFollower.java index 2e166890402..7962d8e30f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacitySchedulerPlanFollower.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/reservation/CapacitySchedulerPlanFollower.java @@ -28,10 +28,12 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AutoCreatedLeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.PlanQueue; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ReservationQueue; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.resource.Resources; import org.slf4j.Logger; @@ -92,8 +94,8 @@ protected void addReservationQueue( String planQueueName, Queue queue, String currResId) { PlanQueue planQueue = (PlanQueue)queue; try { - AutoCreatedLeafQueue resQueue = - new AutoCreatedLeafQueue(cs, currResId, planQueue); + ReservationQueue resQueue = + new ReservationQueue(cs, currResId, planQueue); cs.addQueue(resQueue); } catch (SchedulerDynamicEditException e) { LOG.warn( @@ -112,8 +114,8 @@ protected void createDefaultReservationQueue( PlanQueue planQueue = (PlanQueue)queue; if (cs.getQueue(defReservationId) == null) { try { - AutoCreatedLeafQueue defQueue = - new AutoCreatedLeafQueue(cs, defReservationId, planQueue); + ReservationQueue defQueue = + new ReservationQueue(cs, defReservationId, planQueue); cs.addQueue(defQueue); } catch (SchedulerDynamicEditException e) { LOG.warn( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index d94efb1150f..cf5e13bf945 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -457,7 +457,7 @@ public void removeQueue(String queueName) throws YarnException { } @Override - public void addQueue(Queue newQueue) throws YarnException { + public void addQueue(Queue newQueue) throws YarnException, IOException { throw new YarnException(getClass().getSimpleName() + " does not support this operation"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 111998bae6f..93ca7c2ea18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -272,7 +272,7 @@ public void preValidateMoveApplication(ApplicationId appId, * @param newQueue the queue being added. * @throws YarnException */ - void addQueue(Queue newQueue) throws YarnException; + void addQueue(Queue newQueue) throws YarnException, IOException; /** * This method increase the entitlement for current queue (must respect diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractAutoCreatedLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractAutoCreatedLeafQueue.java new file mode 100644 index 00000000000..ac97d722afe --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractAutoCreatedLeafQueue.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerDynamicEditException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common + .QueueEntitlement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager + .NO_LABEL; + +/** + * Abstract class for dynamic auto created queues managed by an implementation + * of AbstractManagedParentQueue + */ +public class AbstractAutoCreatedLeafQueue extends LeafQueue { + + protected AbstractManagedParentQueue parent; + + public AbstractAutoCreatedLeafQueue(CapacitySchedulerContext cs, + String queueName, AbstractManagedParentQueue parent, CSQueue old) + throws IOException { + super(cs, queueName, parent, old); + this.parent = parent; + } + + private static final Logger LOG = LoggerFactory.getLogger( + AbstractAutoCreatedLeafQueue.class); + + public AbstractAutoCreatedLeafQueue(CapacitySchedulerContext cs, + CapacitySchedulerConfiguration leafQueueConfigs, String queueName, + AbstractManagedParentQueue parent, CSQueue old) throws IOException { + super(cs, leafQueueConfigs, queueName, parent, old); + this.parent = parent; + } + + /** + * This methods to change capacity for a queue and adjusts its + * absoluteCapacity + * + * @param entitlement the new entitlement for the queue (capacity, + * maxCapacity, etc..) + * @throws SchedulerDynamicEditException + */ + public void setEntitlement(QueueEntitlement entitlement) + throws SchedulerDynamicEditException { + setEntitlement(NO_LABEL, entitlement); + } + + /** + * This methods to change capacity for a queue and adjusts its + * absoluteCapacity + * + * @param entitlement the new entitlement for the queue (capacity, + * maxCapacity, etc..) + * @throws SchedulerDynamicEditException + */ + public void setEntitlement(String nodeLabel, QueueEntitlement entitlement) + throws SchedulerDynamicEditException { + try { + writeLock.lock(); + float capacity = entitlement.getCapacity(); + if (capacity < 0 || capacity > 1.0f) { + throw new SchedulerDynamicEditException( + "Capacity demand is not in the [0,1] range: " + capacity); + } + setCapacity(nodeLabel, capacity); + setAbsoluteCapacity(nodeLabel, + getParent().getQueueCapacities(). + getAbsoluteCapacity(nodeLabel) + * getQueueCapacities().getCapacity(nodeLabel)); + // note: we currently set maxCapacity to capacity + // this might be revised later + setMaxCapacity(nodeLabel, entitlement.getMaxCapacity()); + if (LOG.isDebugEnabled()) { + LOG.debug("successfully changed to " + capacity + " for queue " + this + .getQueueName()); + } + + //update queue used capacity etc + CSQueueUtils.updateQueueStatistics(resourceCalculator, + csContext.getClusterResource(), + this, labelManager, nodeLabel); + } finally { + writeLock.unlock(); + } + } + + protected void setupConfigurableCapacities(QueueCapacities queueCapacities) { + CSQueueUtils.updateAndCheckCapacitiesByLabel(getQueuePath(), + queueCapacities, parent == null ? null : parent.getQueueCapacities()); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 140ea5dc903..4df4cf23d13 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -128,27 +128,34 @@ public AbstractCSQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { + this(cs, cs.getConfiguration(), queueName, parent, old); + } + + public AbstractCSQueue(CapacitySchedulerContext cs, + CapacitySchedulerConfiguration configuration, String queueName, + CSQueue parent, CSQueue old) { + this.labelManager = cs.getRMContext().getNodeLabelManager(); this.parent = parent; this.queueName = queueName; - this.queuePath = - ((parent == null) ? "" : (parent.getQueuePath() + ".")) + this.queueName; + this.queuePath = ((parent == null) ? "" : (parent.getQueuePath() + ".")) + + this.queueName; this.resourceCalculator = cs.getResourceCalculator(); this.activitiesManager = cs.getActivitiesManager(); - + // must be called after parent and queueName is set - this.metrics = - old != null ? (CSQueueMetrics) old.getMetrics() : CSQueueMetrics - .forQueue(getQueuePath(), parent, cs.getConfiguration() - .getEnableUserMetrics(), cs.getConf()); + this.metrics = old != null ? + (CSQueueMetrics) old.getMetrics() : + CSQueueMetrics.forQueue(getQueuePath(), parent, + configuration.getEnableUserMetrics(), cs.getConf()); this.csContext = cs; this.minimumAllocation = csContext.getMinimumResourceCapability(); - + // initialize ResourceUsage queueUsage = new ResourceUsage(); queueEntity = new PrivilegedEntity(EntityType.QUEUE, getQueuePath()); - + // initialize QueueCapacities queueCapacities = new QueueCapacities(parent == null); @@ -159,11 +166,16 @@ public AbstractCSQueue(CapacitySchedulerContext cs, readLock = lock.readLock(); writeLock = lock.writeLock(); } - + protected void setupConfigurableCapacities() { + setupConfigurableCapacities(csContext.getConfiguration()); + } + + protected void setupConfigurableCapacities( + CapacitySchedulerConfiguration configuration) { CSQueueUtils.loadUpdateAndCheckCapacities( getQueuePath(), - csContext.getConfiguration(), + configuration, queueCapacities, parent == null ? null : parent.getQueueCapacities()); } @@ -275,6 +287,29 @@ void setMaxCapacity(float maximumCapacity) { } } + /** + * Set maximum capacity + * @param maximumCapacity new max capacity + */ + void setMaxCapacity(String nodeLabel, float maximumCapacity) { + try { + writeLock.lock(); + // Sanity check + CSQueueUtils.checkMaxCapacity(getQueueName(), + queueCapacities.getCapacity(nodeLabel), maximumCapacity); + float absMaxCapacity = CSQueueUtils.computeAbsoluteMaximumCapacity( + maximumCapacity, parent); + CSQueueUtils.checkAbsoluteCapacity(getQueueName(), + queueCapacities.getAbsoluteCapacity(nodeLabel), absMaxCapacity); + + queueCapacities.setMaximumCapacity(maximumCapacity); + queueCapacities.setAbsoluteMaximumCapacity(absMaxCapacity); + } finally { + writeLock.unlock(); + } + } + + @Override public String getDefaultNodeLabelExpression() { return defaultLabelExpression; @@ -282,13 +317,20 @@ public String getDefaultNodeLabelExpression() { void setupQueueConfigs(Resource clusterResource) throws IOException { + setupQueueConfigs(clusterResource, csContext.getConfiguration()); + } + + protected void setupQueueConfigs(Resource clusterResource, + CapacitySchedulerConfiguration configuration) throws + IOException { + try { writeLock.lock(); // get labels this.accessibleLabels = - csContext.getConfiguration().getAccessibleNodeLabels(getQueuePath()); + configuration.getAccessibleNodeLabels(getQueuePath()); this.defaultLabelExpression = - csContext.getConfiguration().getDefaultNodeLabelExpression( + configuration.getDefaultNodeLabelExpression( getQueuePath()); this.resourceTypes = new HashSet(); for (AbsoluteResourceType type : AbsoluteResourceType.values()) { @@ -308,7 +350,7 @@ void setupQueueConfigs(Resource clusterResource) } // After we setup labels, we can setup capacities - setupConfigurableCapacities(); + setupConfigurableCapacities(configuration); // Also fetch minimum/maximum resource constraint for this queue if // configured. @@ -316,20 +358,20 @@ void setupQueueConfigs(Resource clusterResource) updateConfigurableResourceRequirement(getQueuePath(), clusterResource); this.maximumAllocation = - csContext.getConfiguration().getMaximumAllocationPerQueue( + configuration.getMaximumAllocationPerQueue( getQueuePath()); // initialized the queue state based on previous state, configured state // and its parent state. QueueState previous = getState(); - QueueState configuredState = csContext.getConfiguration() + QueueState configuredState = configuration .getConfiguredState(getQueuePath()); QueueState parentState = (parent == null) ? null : parent.getState(); initializeQueueState(previous, configuredState, parentState); authorizer = YarnAuthorizationProvider.getInstance(csContext.getConf()); - this.acls = csContext.getConfiguration().getAcls(getQueuePath()); + this.acls = configuration.getAcls(getQueuePath()); // Update metrics CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, @@ -361,18 +403,21 @@ void setupQueueConfigs(Resource clusterResource) this.reservationsContinueLooking = csContext.getConfiguration().getReservationContinueLook(); - this.preemptionDisabled = isQueueHierarchyPreemptionDisabled(this); + this.preemptionDisabled = isQueueHierarchyPreemptionDisabled(this, + configuration); - this.priority = csContext.getConfiguration().getQueuePriority( + this.priority = configuration.getQueuePriority( getQueuePath()); - this.userWeights = getUserWeightsFromHierarchy(); + this.userWeights = getUserWeightsFromHierarchy(configuration); } finally { writeLock.unlock(); } } - private Map getUserWeightsFromHierarchy() throws IOException { + private Map getUserWeightsFromHierarchy + (CapacitySchedulerConfiguration configuration) throws + IOException { Map unionInheritedWeights = new HashMap(); CSQueue parentQ = getParent(); if (parentQ != null) { @@ -381,9 +426,8 @@ void setupQueueConfigs(Resource clusterResource) } // Insert this queue's user's weights, overriding parent's user's weights if // there is overlap. - CapacitySchedulerConfiguration csConf = csContext.getConfiguration(); unionInheritedWeights.putAll( - csConf.getAllUserWeightsForQueue(getQueuePath())); + configuration.getAllUserWeightsForQueue(getQueuePath())); return unionInheritedWeights; } @@ -720,10 +764,11 @@ public QueueResourceQuotas getQueueResourceQuotas() { * * @return true if queue has preemption disabled, false otherwise */ - private boolean isQueueHierarchyPreemptionDisabled(CSQueue q) { - CapacitySchedulerConfiguration csConf = csContext.getConfiguration(); + private boolean isQueueHierarchyPreemptionDisabled(CSQueue q, + CapacitySchedulerConfiguration configuration) { boolean systemWidePreemption = - csConf.getBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, + csContext.getConfiguration() + .getBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, YarnConfiguration.DEFAULT_RM_SCHEDULER_ENABLE_MONITORS); CSQueue parentQ = q.getParent(); @@ -735,14 +780,14 @@ private boolean isQueueHierarchyPreemptionDisabled(CSQueue q) { // on, then q does not have preemption disabled (default=false, below) // unless the preemption_disabled property is explicitly set. if (parentQ == null) { - return csConf.getPreemptionDisabled(q.getQueuePath(), false); + return configuration.getPreemptionDisabled(q.getQueuePath(), false); } // If this is not the root queue, inherit the default value for the // preemption_disabled property from the parent. Preemptability will be // inherited from the parent's hierarchy unless explicitly overridden at // this level. - return csConf.getPreemptionDisabled(q.getQueuePath(), + return configuration.getPreemptionDisabled(q.getQueuePath(), parentQ.getPreemptionDisabled()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractManagedParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractManagedParentQueue.java index 46f5cf113ea..9d38f791e73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractManagedParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractManagedParentQueue.java @@ -17,13 +17,21 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common + .QueueEntitlement; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Comparator; import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; /** * A container class for automatically created child leaf queues. @@ -35,13 +43,12 @@ private static final Logger LOG = LoggerFactory.getLogger( AbstractManagedParentQueue.class); - protected AutoCreatedLeafQueueTemplate leafQueueTemplate; + protected AutoCreatedLeafQueueConfig leafQueueTemplate; + protected AutoCreatedQueueManagementPolicy queueManagementPolicy = null; public AbstractManagedParentQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { super(cs, queueName, parent, old); - - super.setupQueueConfigs(csContext.getClusterResource()); } @Override @@ -53,52 +60,18 @@ public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) // Set new configs setupQueueConfigs(clusterResource); - // run reinitialize on each existing queue, to trigger absolute cap - // recomputations - for (CSQueue res : this.getChildQueues()) { - res.reinitialize(res, clusterResource); - } } finally { writeLock.unlock(); } } - /** - * Initialize leaf queue configs from template configurations specified on - * parent queue. - */ - protected AutoCreatedLeafQueueTemplate.Builder initializeLeafQueueConfigs - (String queuePath) { - - CapacitySchedulerConfiguration conf = csContext.getConfiguration(); - - AutoCreatedLeafQueueTemplate.Builder leafQueueTemplateBuilder = new - AutoCreatedLeafQueueTemplate.Builder(); - int maxApps = conf.getMaximumApplicationsPerQueue(queuePath); - if (maxApps < 0) { - maxApps = (int) ( - CapacitySchedulerConfiguration.DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS - * getAbsoluteCapacity()); - } - - int userLimit = conf.getUserLimit(queuePath); - float userLimitFactor = conf.getUserLimitFactor(queuePath); - leafQueueTemplateBuilder.userLimit(userLimit) - .userLimitFactor(userLimitFactor) - .maxApps(maxApps) - .maxAppsPerUser( - (int) (maxApps * (userLimit / 100.0f) * userLimitFactor)); - - return leafQueueTemplateBuilder; - } - /** * Add the specified child queue. * @param childQueue reference to the child queue to be added * @throws SchedulerDynamicEditException */ public void addChildQueue(CSQueue childQueue) - throws SchedulerDynamicEditException { + throws SchedulerDynamicEditException, IOException { try { writeLock.lock(); if (childQueue.getCapacity() > 0) { @@ -193,84 +166,69 @@ protected float sumOfChildAbsCapacities() { } } - public static class AutoCreatedLeafQueueTemplate { - - private QueueCapacities queueCapacities; - - private int maxApps; - private int maxAppsPerUser; - private int userLimit; - private float userLimitFactor; - - AutoCreatedLeafQueueTemplate(Builder builder) { - this.maxApps = builder.maxApps; - this.maxAppsPerUser = builder.maxAppsPerUser; - this.userLimit = builder.userLimit; - this.userLimitFactor = builder.userLimitFactor; - this.queueCapacities = builder.queueCapacities; - } - - public static class Builder { - private int maxApps; - private int maxAppsPerUser; + public AutoCreatedLeafQueueConfig getLeafQueueTemplate() { + return leafQueueTemplate; + } - private int userLimit; - private float userLimitFactor; + public AutoCreatedQueueManagementPolicy + getAutoCreatedQueueManagementPolicy() { + return queueManagementPolicy; + } - private QueueCapacities queueCapacities; + protected SortedMap getConfigurationsWithPrefix + (SortedMap sortedConfigs, String prefix) { + return sortedConfigs.subMap( prefix, prefix + Character.MAX_VALUE ); + } - Builder maxApps(int maxApplications) { - this.maxApps = maxApplications; - return this; - } + protected SortedMap sortCSConfigurations() { + SortedMap sortedConfigs = new TreeMap( + new Comparator() { + public int compare(String s1, String s2) { + return s1.compareToIgnoreCase(s2); + } - Builder maxAppsPerUser(int maxApplicationsPerUser) { - this.maxAppsPerUser = maxApplicationsPerUser; - return this; - } + }); - Builder userLimit(int usrLimit) { - this.userLimit = usrLimit; - return this; - } + for (final Iterator> iterator = + csContext.getConfiguration().iterator(); iterator.hasNext(); ) { + final Map.Entry confKeyValuePair = iterator.next(); + sortedConfigs.put(confKeyValuePair.getKey(), confKeyValuePair.getValue()); + } + return sortedConfigs; + } - Builder userLimitFactor(float ulf) { - this.userLimitFactor = ulf; - return this; - } + protected CapacitySchedulerConfiguration initializeLeafQueueConfigs(String + configPrefix) { - Builder capacities(QueueCapacities capacities) { - this.queueCapacities = capacities; - return this; - } + CapacitySchedulerConfiguration leafQueueConfigs = new + CapacitySchedulerConfiguration(new Configuration(false), false); - AutoCreatedLeafQueueTemplate build() { - return new AutoCreatedLeafQueueTemplate(this); - } - } + SortedMap sortedConfigs = sortCSConfigurations(); + SortedMap templateConfigs = getConfigurationsWithPrefix + (sortedConfigs, configPrefix); - public int getUserLimit() { - return userLimit; + for (final Iterator> iterator = + templateConfigs.entrySet().iterator(); iterator.hasNext(); ) { + Map.Entry confKeyValuePair = iterator.next(); + leafQueueConfigs.set(confKeyValuePair.getKey(), + confKeyValuePair.getValue()); } - public float getUserLimitFactor() { - return userLimitFactor; - } + return leafQueueConfigs; + } - public QueueCapacities getQueueCapacities() { - return queueCapacities; - } + protected void validateQueueEntitlementChange(AbstractAutoCreatedLeafQueue + leafQueue, QueueEntitlement entitlement) + throws SchedulerDynamicEditException { - public int getMaxApps() { - return maxApps; - } + float sumChilds = sumOfChildCapacities(); + float newChildCap = + sumChilds - leafQueue.getCapacity() + entitlement.getCapacity(); - public int getMaxAppsPerUser() { - return maxAppsPerUser; + if (!(newChildCap >= 0 && newChildCap < 1.0f + CSQueueUtils.EPSILON)) { + throw new SchedulerDynamicEditException( + "Sum of child queues should exceed 100% for auto creating parent " + + "queue : " + queueName); } } - - public AutoCreatedLeafQueueTemplate getLeafQueueTemplate() { - return leafQueueTemplate; - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java index bc206d41521..1d796ad8ae7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java @@ -21,36 +21,27 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity - .AbstractManagedParentQueue.AutoCreatedLeafQueueTemplate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; /** - * Leaf queues which are auto created by an underkying implementation of + * Leaf queues which are auto created by an underlying implementation of * AbstractManagedParentQueue. Eg: PlanQueue for reservations or * ManagedParentQueue for auto created dynamic queues */ -public class AutoCreatedLeafQueue extends LeafQueue { +public class AutoCreatedLeafQueue extends AbstractAutoCreatedLeafQueue { private static final Logger LOG = LoggerFactory .getLogger(AutoCreatedLeafQueue.class); - private AbstractManagedParentQueue parent; - public AutoCreatedLeafQueue(CapacitySchedulerContext cs, String queueName, - AbstractManagedParentQueue parent) throws IOException { - super(cs, queueName, parent, null); - - AutoCreatedLeafQueueTemplate leafQueueTemplate = - parent.getLeafQueueTemplate(); - updateApplicationAndUserLimits(leafQueueTemplate.getUserLimit(), - leafQueueTemplate.getUserLimitFactor(), - leafQueueTemplate.getMaxApps(), - leafQueueTemplate.getMaxAppsPerUser()); - this.parent = parent; + ManagedParentQueue parent) throws IOException { + super(cs, parent.getLeafQueueConfigs(queueName), + queueName, + parent, null); + updateCapacitiesToZero(); } @Override @@ -61,48 +52,75 @@ public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) validate(newlyParsedQueue); - super.reinitialize(newlyParsedQueue, clusterResource); - CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, - this, labelManager, null); + ManagedParentQueue managedParentQueue = (ManagedParentQueue) parent; - AutoCreatedLeafQueueTemplate leafQueueTemplate = - parent.getLeafQueueTemplate(); - updateApplicationAndUserLimits(leafQueueTemplate.getUserLimit(), - leafQueueTemplate.getUserLimitFactor(), - leafQueueTemplate.getMaxApps(), - leafQueueTemplate.getMaxAppsPerUser()); + super.reinitialize(newlyParsedQueue, clusterResource, managedParentQueue + .getLeafQueueConfigs(newlyParsedQueue.getQueueName())); + + //Reset capacities to 0 since reinitialize above + // queueCapacities to initialize to configured capacity which might + // overcommit resources from parent queue + updateCapacitiesToZero(); } finally { writeLock.unlock(); } } - /** - * This methods to change capacity for a queue and adjusts its - * absoluteCapacity. - * - * @param entitlement the new entitlement for the queue (capacity, - * maxCapacity) - * @throws SchedulerDynamicEditException - */ - public void setEntitlement(QueueEntitlement entitlement) - throws SchedulerDynamicEditException { + public void reinitializeFromTemplate(AutoCreatedLeafQueueConfig + leafQueueTemplate) throws SchedulerDynamicEditException, IOException { + try { writeLock.lock(); - float capacity = entitlement.getCapacity(); + + // TODO: + // reinitialize only capacities for now since 0 capacity updates + // can cause + // abs capacity related config computations to be incorrect if we go + // through reinitialize + QueueCapacities capacities = leafQueueTemplate.getQueueCapacities(); + + //update abs capacities + setupConfigurableCapacities(capacities); + + //reset capacities for the leaf queue + mergeCapacities(capacities); + + //update queue used capacity for all the node labels + CSQueueUtils.updateQueueStatistics(resourceCalculator, + csContext.getClusterResource(), + this, labelManager, null); + + //activate applications if any are pending + activateApplications(); + + } finally { + writeLock.unlock(); + } + } + + private void mergeCapacities(QueueCapacities capacities) { + for ( String nodeLabel : capacities.getExistingNodeLabels()) { + this.queueCapacities.setCapacity(nodeLabel, + capacities.getCapacity(nodeLabel)); + this.queueCapacities.setAbsoluteCapacity(nodeLabel, capacities + .getAbsoluteCapacity(nodeLabel)); + this.queueCapacities.setMaximumCapacity(nodeLabel, capacities + .getMaximumCapacity(nodeLabel)); + this.queueCapacities.setAbsoluteMaximumCapacity(nodeLabel, capacities + .getAbsoluteMaximumCapacity(nodeLabel)); + } + } + + public void validateConfigurations(AutoCreatedLeafQueueConfig template) + throws SchedulerDynamicEditException { + QueueCapacities capacities = template.getQueueCapacities(); + for (String label : capacities.getExistingNodeLabels()) { + float capacity = capacities.getCapacity(label); if (capacity < 0 || capacity > 1.0f) { throw new SchedulerDynamicEditException( "Capacity demand is not in the [0,1] range: " + capacity); } - setCapacity(capacity); - setAbsoluteCapacity(getParent().getAbsoluteCapacity() * getCapacity()); - setMaxCapacity(entitlement.getMaxCapacity()); - if (LOG.isDebugEnabled()) { - LOG.debug("successfully changed to " + capacity + " for queue " + this - .getQueueName()); - } - } finally { - writeLock.unlock(); } } @@ -113,22 +131,20 @@ private void validate(final CSQueue newlyParsedQueue) throws IOException { "Error trying to reinitialize " + getQueuePath() + " from " + newlyParsedQueue.getQueuePath()); } - - } - - @Override - protected void setupConfigurableCapacities() { - CSQueueUtils.updateAndCheckCapacitiesByLabel(getQueuePath(), - queueCapacities, parent == null ? null : parent.getQueueCapacities()); } - private void updateApplicationAndUserLimits(int userLimit, - float userLimitFactor, - int maxAppsForAutoCreatedQueues, - int maxAppsPerUserForAutoCreatedQueues) { - setUserLimit(userLimit); - setUserLimitFactor(userLimitFactor); - setMaxApplications(maxAppsForAutoCreatedQueues); - setMaxApplicationsPerUser(maxAppsPerUserForAutoCreatedQueues); + private void updateCapacitiesToZero() throws IOException { + try { + for( String nodeLabel : parent.getQueueCapacities().getExistingNodeLabels + ()) { + //TODO - update to use getMaximumCapacity(nodeLabel) in YARN-7574 + setEntitlement(nodeLabel, new QueueEntitlement(0.0f, + parent.getLeafQueueTemplate() + .getQueueCapacities() + .getMaximumCapacity())); + } + } catch (SchedulerDynamicEditException e) { + throw new IOException(e); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java new file mode 100644 index 00000000000..5952250a02a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +/** + * Auto Created Leaf queue configurations, capacity + */ +public class AutoCreatedLeafQueueConfig { + + /** + * Template queue capacities - contains configured and derived capacities + * like abs capacity which are used by auto queue creation policy to manage + * leaf queue capacities + */ + private QueueCapacities queueCapacities; + + private CapacitySchedulerConfiguration leafQueueConfigs; + + public AutoCreatedLeafQueueConfig(Builder builder) { + this.queueCapacities = builder.queueCapacities; + this.leafQueueConfigs = builder.leafQueueConfigs; + } + + public static class Builder { + + private QueueCapacities queueCapacities; + private CapacitySchedulerConfiguration leafQueueConfigs; + + public Builder capacities(QueueCapacities capacities) { + this.queueCapacities = capacities; + return this; + } + + public Builder configuration(CapacitySchedulerConfiguration conf) { + this.leafQueueConfigs = conf; + return this; + } + + public AutoCreatedLeafQueueConfig build() { + return new AutoCreatedLeafQueueConfig(this); + } + } + + public QueueCapacities getQueueCapacities() { + return queueCapacities; + } + + public CapacitySchedulerConfiguration getLeafQueueConfigs() { + return leafQueueConfigs; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedQueueManagementPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedQueueManagementPolicy.java new file mode 100644 index 00000000000..f7a4bbdeee3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedQueueManagementPolicy.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; +import java.util.List; + +public interface AutoCreatedQueueManagementPolicy { + + /** + * Initialize policy + * @param schedulerContext Capacity Scheduler context + */ + void init(CapacitySchedulerContext schedulerContext, ParentQueue parentQueue); + + /** + * Reinitialize policy state ( if required ) + * @param schedulerContext Capacity Scheduler context + */ + void reinitialize(CapacitySchedulerContext schedulerContext, + ParentQueue parentQueue); + + /** + * Get initial template for the specified leaf queue + * @param leafQueue the leaf queue + * @return initial leaf queue template configurations and capacities for + * auto created queue + */ + AutoCreatedLeafQueueConfig getInitialLeafQueueConfiguration( + AbstractAutoCreatedLeafQueue leafQueue) + throws SchedulerDynamicEditException; + + /** + * Compute/Adjust child queue capacities + * for auto created leaf queues + * + * @return returns a list of suggested QueueEntitlementChange(s) which may + * or may not be be enforced by the scheduler + */ + List computeQueueManagementChanges() + throws SchedulerDynamicEditException; + + /** + * Commit/Update state for the specified queue management changes. + */ + void commitQueueManagementChanges( + List queueManagementChanges) + throws SchedulerDynamicEditException; +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index 6d79b6a3ce5..5dd307c05a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -230,7 +230,7 @@ public void completedContainer(Resource clusterResource, * @param newlyParsedQueue new queue to re-initalize from * @param clusterResource resources in the cluster */ - public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) + public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) throws IOException; /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java index 6daca5158a3..7b35550f939 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java @@ -29,9 +29,9 @@ import com.google.common.collect.Sets; -class CSQueueUtils { +public class CSQueueUtils { - final static float EPSILON = 0.0001f; + public final static float EPSILON = 0.0001f; /* * Used only by tests @@ -133,12 +133,12 @@ private static void loadCapacitiesByLabelsFromConf(String queuePath, for (String label : configuredNodelabels) { if (label.equals(CommonNodeLabelsManager.NO_LABEL)) { - queueCapacities.setCapacity(CommonNodeLabelsManager.NO_LABEL, + queueCapacities.setCapacity(label, csConf.getNonLabeledQueueCapacity(queuePath) / 100); - queueCapacities.setMaximumCapacity(CommonNodeLabelsManager.NO_LABEL, + queueCapacities.setMaximumCapacity(label, csConf.getNonLabeledQueueMaximumCapacity(queuePath) / 100); queueCapacities.setMaxAMResourcePercentage( - CommonNodeLabelsManager.NO_LABEL, + label, csConf.getMaximumAMResourcePercentPerPartition(queuePath, label)); } else { queueCapacities.setCapacity(label, @@ -193,9 +193,32 @@ public static void updateUsedCapacity(final ResourceCalculator rc, if (Resources.greaterThan(rc, totalPartitionResource, totalPartitionResource, Resources.none())) { + Resource queueGuranteedResource = childQueue .getEffectiveCapacity(nodePartition); + //TODO : Modify below code to support Absolute Resource configurations + // (YARN-5881) for AutoCreatedLeafQueue + if (Float.compare(queueCapacities.getAbsoluteCapacity + (nodePartition), 0f) == 0 + && childQueue instanceof AutoCreatedLeafQueue) { + + //If absolute capacity is 0 for a leaf queue (could be a managed leaf + // queue, then use the leaf queue's template capacity to compute + // guaranteed resource for used capacity) + + // queueGuaranteed = totalPartitionedResource * + // absolute_capacity(partition) + ManagedParentQueue parentQueue = (ManagedParentQueue) + childQueue.getParent(); + QueueCapacities leafQueueTemplateCapacities = parentQueue + .getLeafQueueTemplate() + .getQueueCapacities(); + queueGuranteedResource = Resources.multiply(totalPartitionResource, + leafQueueTemplateCapacities.getAbsoluteCapacity + (nodePartition)); + } + // make queueGuranteed >= minimum_allocation to avoid divided by 0. queueGuranteedResource = Resources.max(rc, totalPartitionResource, queueGuranteedResource, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index de93a6a2480..a5efd9f4c81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.placement.UserGroupMappingPlacementRule; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; + import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationConstants; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; @@ -129,6 +130,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .QueueManagementChangeEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ReleaseContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; @@ -138,6 +141,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.AppPriorityACLsManager; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.Lock; +import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -175,6 +179,8 @@ private CSConfigurationProvider csConfProvider; + protected Clock monotonicClock; + @Override public void setConf(Configuration conf) { yarnConf = conf; @@ -1501,7 +1507,7 @@ public void handle(SchedulerEvent event) { { NodeLabelsUpdateSchedulerEvent labelUpdateEvent = (NodeLabelsUpdateSchedulerEvent) event; - + updateNodeLabelsAndQueueResource(labelUpdateEvent); } break; @@ -1613,6 +1619,25 @@ public void handle(SchedulerEvent event) { } } break; + case MANAGE_QUEUE: + { + QueueManagementChangeEvent queueManagementChangeEvent = + (QueueManagementChangeEvent) event; + ParentQueue parentQueue = queueManagementChangeEvent.getParentQueue(); + try { + final List queueManagementChanges = + queueManagementChangeEvent.getQueueManagementChanges(); + ((ManagedParentQueue) parentQueue) + .validateAndApplyQueueManagementChanges(queueManagementChanges); + } catch (SchedulerDynamicEditException sde) { + LOG.error("Queue Management Change event cannot be applied for " + + "parent queue : " + parentQueue.getQueueName(), sde); + } catch (IOException ioe) { + LOG.error("Queue Management Change event cannot be applied for " + + "parent queue : " + parentQueue.getQueueName(), ioe); + } + } + break; default: LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!"); } @@ -1976,12 +2001,14 @@ public void removeQueue(String queueName) writeLock.lock(); LOG.info("Removing queue: " + queueName); CSQueue q = this.getQueue(queueName); - if (!(q instanceof AutoCreatedLeafQueue)) { + if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom( + q.getClass()))) { throw new SchedulerDynamicEditException( "The queue that we are asked " + "to remove (" + queueName - + ") is not a AutoCreatedLeafQueue"); + + ") is not a AutoCreatedLeafQueue or ReservationQueue"); } - AutoCreatedLeafQueue disposableLeafQueue = (AutoCreatedLeafQueue) q; + AbstractAutoCreatedLeafQueue disposableLeafQueue = + (AbstractAutoCreatedLeafQueue) q; // at this point we should have no more apps if (disposableLeafQueue.getNumApplications() > 0) { throw new SchedulerDynamicEditException( @@ -1994,8 +2021,8 @@ public void removeQueue(String queueName) ((AbstractManagedParentQueue) disposableLeafQueue.getParent()) .removeChildQueue(q); this.queueManager.removeQueue(queueName); - LOG.info("Removal of AutoCreatedLeafQueue " - + queueName + " has succeeded"); + LOG.info( + "Removal of AutoCreatedLeafQueue " + queueName + " has succeeded"); } finally { writeLock.unlock(); } @@ -2003,22 +2030,27 @@ public void removeQueue(String queueName) @Override public void addQueue(Queue queue) - throws SchedulerDynamicEditException { + throws SchedulerDynamicEditException, IOException { try { writeLock.lock(); - if (!(queue instanceof AutoCreatedLeafQueue)) { + if (queue == null) { + throw new SchedulerDynamicEditException( + "Queue specified is null. Should be an implementation of " + + "AbstractAutoCreatedLeafQueue"); + } else if (!(AbstractAutoCreatedLeafQueue.class + .isAssignableFrom(queue.getClass()))) { throw new SchedulerDynamicEditException( - "Queue " + queue.getQueueName() + " is not a AutoCreatedLeafQueue"); + "Queue is not an implementation of " + + "AbstractAutoCreatedLeafQueue : " + queue.getClass()); } - AutoCreatedLeafQueue newQueue = (AutoCreatedLeafQueue) queue; + AbstractAutoCreatedLeafQueue newQueue = + (AbstractAutoCreatedLeafQueue) queue; - if (newQueue.getParent() == null - || !(AbstractManagedParentQueue.class. + if (newQueue.getParent() == null || !(AbstractManagedParentQueue.class. isAssignableFrom(newQueue.getParent().getClass()))) { throw new SchedulerDynamicEditException( - "ParentQueue for " + newQueue.getQueueName() - + " is not properly set" + "ParentQueue for " + newQueue + " is not properly set" + " (should be set and be a PlanQueue or ManagedParentQueue)"); } @@ -2027,6 +2059,7 @@ public void addQueue(Queue queue) String queuename = newQueue.getQueueName(); parentPlan.addChildQueue(newQueue); this.queueManager.addQueue(queuename, newQueue); + LOG.info("Creation of AutoCreatedLeafQueue " + newQueue + " succeeded"); } finally { writeLock.unlock(); @@ -2039,48 +2072,32 @@ public void setEntitlement(String inQueue, QueueEntitlement entitlement) try { writeLock.lock(); LeafQueue queue = this.queueManager.getAndCheckLeafQueue(inQueue); - AbstractManagedParentQueue parent = (AbstractManagedParentQueue) queue - .getParent(); + AbstractManagedParentQueue parent = + (AbstractManagedParentQueue) queue.getParent(); - if (!(queue instanceof AutoCreatedLeafQueue)) { + if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom( + queue.getClass()))) { throw new SchedulerDynamicEditException( "Entitlement can not be" + " modified dynamically since queue " + inQueue + " is not a AutoCreatedLeafQueue"); } - if (parent == null - || !(AbstractManagedParentQueue.class.isAssignableFrom( - parent.getClass()))) { + if (parent == null || !(AbstractManagedParentQueue.class.isAssignableFrom( + parent.getClass()))) { throw new SchedulerDynamicEditException( "The parent of AutoCreatedLeafQueue " + inQueue + " must be a PlanQueue/ManagedParentQueue"); } - AutoCreatedLeafQueue newQueue = (AutoCreatedLeafQueue) queue; + AbstractAutoCreatedLeafQueue newQueue = + (AbstractAutoCreatedLeafQueue) queue; + parent.validateQueueEntitlementChange(newQueue, entitlement); - float sumChilds = parent.sumOfChildCapacities(); - float newChildCap = - sumChilds - queue.getCapacity() + entitlement.getCapacity(); + newQueue.setEntitlement(entitlement); - if (newChildCap >= 0 && newChildCap < 1.0f + CSQueueUtils.EPSILON) { - // note: epsilon checks here are not ok, as the epsilons might - // accumulate and become a problem in aggregate - if (Math.abs(entitlement.getCapacity() - queue.getCapacity()) == 0 - && Math.abs( - entitlement.getMaxCapacity() - queue.getMaximumCapacity()) == 0) { - return; - } - newQueue.setEntitlement(entitlement); - } else{ - throw new SchedulerDynamicEditException( - "Sum of child queues should exceed 100% for auto creating parent " - + "queue : " + parent.getQueueName()); - } - LOG.info( - "Set entitlement for AutoCreatedLeafQueue " + inQueue - + " to " + queue.getCapacity() + - " request was (" + entitlement.getCapacity() - + ")"); + LOG.info("Set entitlement for AutoCreatedLeafQueue " + inQueue + " to " + + queue.getCapacity() + " request was (" + entitlement.getCapacity() + + ")"); } finally { writeLock.unlock(); } @@ -2718,7 +2735,6 @@ private LeafQueue autoCreateLeafQueue( addQueue(autoCreatedLeafQueue); - //TODO - Set entitlement through capacity management policy } else{ throw new SchedulerDynamicEditException( "Could not auto-create leaf queue for " + leafQueueName diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index a33d81aff4b..8aa41ee4a72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -923,6 +923,11 @@ public int getNodeLocalityDelay() { return getInt(NODE_LOCALITY_DELAY, DEFAULT_NODE_LOCALITY_DELAY); } + @VisibleForTesting + public void setNodeLocalityDelay(int nodeLocalityDelay) { + setInt(NODE_LOCALITY_DELAY, nodeLocalityDelay); + } + public int getRackLocalityAdditionalDelay() { return getInt(RACK_LOCALITY_ADDITIONAL_DELAY, DEFAULT_RACK_LOCALITY_ADDITIONAL_DELAY); @@ -1401,6 +1406,10 @@ public int getGlobalMaximumApplicationsPerQueue() { return maxApplicationsPerQueue; } + public void setGlobalMaximumApplicationsPerQueue(int val) { + setInt(QUEUE_GLOBAL_MAX_APPLICATION, val); + } + /** * Ordering policy inside a parent queue to sort queues */ @@ -1620,9 +1629,13 @@ public void setDefaultLifetimePerQueue(String queue, long defaultLifetime) { @Private public static final boolean DEFAULT_AUTO_CREATE_CHILD_QUEUE_ENABLED = false; + @Private + private static final String AUTO_CREATE_CHILD_QUEUE_PREFIX = + "auto-create-child-queue."; + @Private public static final String AUTO_CREATE_CHILD_QUEUE_ENABLED = - "auto-create-child-queue.enabled"; + AUTO_CREATE_CHILD_QUEUE_PREFIX + "enabled"; @Private public static final String AUTO_CREATED_LEAF_QUEUE_TEMPLATE_PREFIX = @@ -1722,8 +1735,83 @@ public int getAutoCreatedQueuesMaxChildQueuesLimit(String queuePath) { } @Private + public static final String AUTO_CREATED_QUEUE_MANAGEMENT_POLICY = + AUTO_CREATE_CHILD_QUEUE_PREFIX + "management-policy"; + + @Private + public static final String DEFAULT_AUTO_CREATED_QUEUE_MANAGEMENT_POLICY = + "org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity" + + ".queuemanagement." + + "GuaranteedOrZeroCapacityOverTimePolicy"; + + @Private + private static final String QUEUE_MANAGEMENT_CONFIG_PREFIX = + "yarn.resourcemanager.monitor.capacity.queue-management."; + + /** + * Time in milliseconds between invocations of this policy + */ + @Private + public static final String QUEUE_MANAGEMENT_MONITORING_INTERVAL = + QUEUE_MANAGEMENT_CONFIG_PREFIX + "monitoring-interval"; + + @Private + public static final long DEFAULT_QUEUE_MANAGEMENT_MONITORING_INTERVAL = + 1500L; + + /** + * Queue Management computation policy for Auto Created queues + * @param queue The queue's path + * @return Configured policy class name + */ + @Private + public String getAutoCreatedQueueManagementPolicy(String queue) { + String autoCreatedQueueManagementPolicy = + get(getQueuePrefix(queue) + AUTO_CREATED_QUEUE_MANAGEMENT_POLICY, + DEFAULT_AUTO_CREATED_QUEUE_MANAGEMENT_POLICY); + return autoCreatedQueueManagementPolicy; + } + + /** + * Get The policy class configured to manage capacities for auto created leaf + * queues under the specified parent + * + * @param queueName The parent queue's name + * @return The policy class configured to manage capacities for auto created + * leaf queues under the specified parent queue + */ + @Private + protected AutoCreatedQueueManagementPolicy + getAutoCreatedQueueManagementPolicyClass( + String queueName) { + + String queueManagementPolicyClassName = + getAutoCreatedQueueManagementPolicy(queueName); + LOG.info("Using Auto Created Queue Management Policy: " + + queueManagementPolicyClassName + " for queue: " + queueName); + try { + Class queueManagementPolicyClazz = getClassByName( + queueManagementPolicyClassName); + if (AutoCreatedQueueManagementPolicy.class.isAssignableFrom( + queueManagementPolicyClazz)) { + return (AutoCreatedQueueManagementPolicy) ReflectionUtils.newInstance( + queueManagementPolicyClazz, this); + } else{ + throw new YarnRuntimeException( + "Class: " + queueManagementPolicyClassName + " not instance of " + + AutoCreatedQueueManagementPolicy.class.getCanonicalName()); + } + } catch (ClassNotFoundException e) { + throw new YarnRuntimeException( + "Could not instantiate " + "AutoCreatedQueueManagementPolicy: " + + queueManagementPolicyClassName + " for queue: " + queueName, + e); + } + } + @VisibleForTesting - public void setAutoCreatedLeafQueueTemplateCapacity(String queuePath, + @Private + public void setAutoCreatedLeafQueueConfigCapacity(String queuePath, float val) { String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix( queuePath); @@ -1732,13 +1820,31 @@ public void setAutoCreatedLeafQueueTemplateCapacity(String queuePath, @Private @VisibleForTesting - public void setAutoCreatedLeafQueueTemplateMaxCapacity(String queuePath, + public void setAutoCreatedLeafQueueConfigMaxCapacity(String queuePath, float val) { String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix( queuePath); setMaximumCapacity(leafQueueConfPrefix, val); } + @VisibleForTesting + @Private + public void setAutoCreatedLeafQueueConfigUserLimit(String queuePath, + int val) { + String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix( + queuePath); + setUserLimit(leafQueueConfPrefix, val); + } + + @VisibleForTesting + @Private + public void setAutoCreatedLeafQueueConfigUserLimitFactor(String queuePath, + float val) { + String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix( + queuePath); + setUserLimitFactor(leafQueueConfPrefix, val); + } + public static String getUnits(String resourceValue) { String units; for (int i = 0; i < resourceValue.length(); i++) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerContext.java index 7c918a53620..ae74989a726 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerContext.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; /** @@ -94,4 +95,11 @@ * @return if configuration is mutable */ boolean isConfigurationMutable(); + + /** + * Get clock from scheduler + * @return Clock + */ + Clock getClock(); + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java index eb501233b2b..30ecd400dc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerQueueManager.java @@ -239,7 +239,7 @@ static CSQueue parseQueue( queueName + ReservationConstants.DEFAULT_QUEUE_SUFFIX; List childQueues = new ArrayList<>(); - AutoCreatedLeafQueue resQueue = new AutoCreatedLeafQueue(csContext, + ReservationQueue resQueue = new ReservationQueue(csContext, defReservationId, (PlanQueue) queue); try { resQueue.setEntitlement(new QueueEntitlement(1.0f, 1.0f)); @@ -312,7 +312,8 @@ private void validateQueueHierarchy(Map queues, Map newQueues) throws IOException { // check that all static queues are included in the newQueues list for (Map.Entry e : queues.entrySet()) { - if (!(e.getValue() instanceof AutoCreatedLeafQueue)) { + if (!(AbstractAutoCreatedLeafQueue.class.isAssignableFrom(e.getValue() + .getClass()))) { String queueName = e.getKey(); CSQueue oldQueue = e.getValue(); CSQueue newQueue = newQueues.get(queueName); @@ -394,7 +395,6 @@ private void updateQueues(Map existingQueues, String queueName = e.getKey(); CSQueue existingQueue = e.getValue(); - //TODO - Handle case when auto create is disabled on parent queues if (!newQueues.containsKey(queueName) && !( existingQueue instanceof AutoCreatedLeafQueue && conf .isAutoCreateChildQueueEnabled( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 41ec4ba762f..86fcbc9afa3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -138,7 +138,14 @@ @SuppressWarnings({ "unchecked", "rawtypes" }) public LeafQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { - super(cs, queueName, parent, old); + this(cs, cs.getConfiguration(), queueName, parent, old); + } + + public LeafQueue(CapacitySchedulerContext cs, + CapacitySchedulerConfiguration configuration, + String queueName, CSQueue parent, CSQueue old) throws + IOException { + super(cs, configuration, queueName, parent, old); this.scheduler = cs; this.usersManager = new UsersManager(metrics, this, labelManager, scheduler, @@ -149,17 +156,25 @@ public LeafQueue(CapacitySchedulerContext cs, if(LOG.isDebugEnabled()) { LOG.debug("LeafQueue:" + " name=" + queueName - + ", fullname=" + getQueuePath()); + + ", fullname=" + getQueuePath()); } - setupQueueConfigs(cs.getClusterResource()); + setupQueueConfigs(cs.getClusterResource(), configuration); + } protected void setupQueueConfigs(Resource clusterResource) throws IOException { + setupQueueConfigs(clusterResource, csContext.getConfiguration()); + } + + protected void setupQueueConfigs(Resource clusterResource, + CapacitySchedulerConfiguration conf) throws + IOException { try { writeLock.lock(); - super.setupQueueConfigs(clusterResource); + CapacitySchedulerConfiguration schedConf = csContext.getConfiguration(); + super.setupQueueConfigs(clusterResource, conf); this.lastClusterResource = clusterResource; @@ -173,8 +188,6 @@ protected void setupQueueConfigs(Resource clusterResource) // absoluteMaxAvailCapacity during headroom/userlimit/allocation events) setQueueResourceLimitsInfo(clusterResource); - CapacitySchedulerConfiguration conf = csContext.getConfiguration(); - setOrderingPolicy( conf.getAppOrderingPolicy(getQueuePath())); @@ -183,11 +196,13 @@ protected void setupQueueConfigs(Resource clusterResource) maxApplications = conf.getMaximumApplicationsPerQueue(getQueuePath()); if (maxApplications < 0) { - int maxGlobalPerQueueApps = conf.getGlobalMaximumApplicationsPerQueue(); + int maxGlobalPerQueueApps = schedConf + .getGlobalMaximumApplicationsPerQueue(); if (maxGlobalPerQueueApps > 0) { maxApplications = maxGlobalPerQueueApps; } else { - int maxSystemApps = conf.getMaximumSystemApplications(); + int maxSystemApps = schedConf. + getMaximumSystemApplications(); maxApplications = (int) (maxSystemApps * queueCapacities.getAbsoluteCapacity()); } @@ -218,9 +233,11 @@ protected void setupQueueConfigs(Resource clusterResource) .join(getAccessibleNodeLabels().iterator(), ','))); } - nodeLocalityDelay = conf.getNodeLocalityDelay(); - rackLocalityAdditionalDelay = conf.getRackLocalityAdditionalDelay(); - rackLocalityFullReset = conf.getRackLocalityFullReset(); + nodeLocalityDelay = schedConf.getNodeLocalityDelay(); + rackLocalityAdditionalDelay = schedConf + .getRackLocalityAdditionalDelay(); + rackLocalityFullReset = schedConf + .getRackLocalityFullReset(); // re-init this since max allocation could have changed this.minimumAllocationFactor = Resources.ratio(resourceCalculator, @@ -507,10 +524,11 @@ public User getUser(String userName) { } } - @Override - public void reinitialize( - CSQueue newlyParsedQueue, Resource clusterResource) - throws IOException { + protected void reinitialize( + CSQueue newlyParsedQueue, Resource clusterResource, + CapacitySchedulerConfiguration configuration) throws + IOException { + try { writeLock.lock(); // Sanity check @@ -535,7 +553,7 @@ public void reinitialize( + newMax); } - setupQueueConfigs(clusterResource); + setupQueueConfigs(clusterResource, configuration); // queue metrics are updated, more resource may be available // activate the pending applications if possible @@ -546,6 +564,14 @@ public void reinitialize( } } + @Override + public void reinitialize( + CSQueue newlyParsedQueue, Resource clusterResource) + throws IOException { + reinitialize(newlyParsedQueue, clusterResource, + csContext.getConfiguration()); + } + @Override public void submitApplicationAttempt(FiCaSchedulerApp application, String userName) { @@ -731,7 +757,7 @@ public Resource calculateAndGetAMResourceLimitPerPartition( } } - private void activateApplications() { + protected void activateApplications() { try { writeLock.lock(); // limit of allowed resource usage for application masters @@ -1991,10 +2017,18 @@ public void setCapacity(float capacity) { queueCapacities.setCapacity(capacity); } + public void setCapacity(String nodeLabel, float capacity) { + queueCapacities.setCapacity(nodeLabel, capacity); + } + public void setAbsoluteCapacity(float absoluteCapacity) { queueCapacities.setAbsoluteCapacity(absoluteCapacity); } + public void setAbsoluteCapacity(String nodeLabel, float absoluteCapacity) { + queueCapacities.setAbsoluteCapacity(nodeLabel, absoluteCapacity); + } + public void setMaxApplicationsPerUser(int maxApplicationsPerUser) { this.maxApplicationsPerUser = maxApplicationsPerUser; } @@ -2002,7 +2036,12 @@ public void setMaxApplicationsPerUser(int maxApplicationsPerUser) { public void setMaxApplications(int maxApplications) { this.maxApplications = maxApplications; } - + + public void setMaxAMResourcePerQueuePercent( + float maxAMResourcePerQueuePercent) { + this.maxAMResourcePerQueuePercent = maxAMResourcePerQueuePercent; + } + public OrderingPolicy getOrderingPolicy() { return orderingPolicy; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java index ff795e47b23..cbdb21d9498 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java @@ -17,13 +17,23 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler .SchedulerDynamicEditException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica + .FiCaSchedulerApp; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; /** * Auto Creation enabled Parent queue. This queue initially does not have any @@ -44,54 +54,125 @@ public ManagedParentQueue(final CapacitySchedulerContext cs, final String queueName, final CSQueue parent, final CSQueue old) throws IOException { super(cs, queueName, parent, old); - String leafQueueTemplateConfPrefix = getLeafQueueConfigPrefix( - csContext.getConfiguration()); - this.leafQueueTemplate = initializeLeafQueueConfigs( - leafQueueTemplateConfPrefix).build(); + + shouldFailAutoCreationWhenGuaranteedCapacityExceeded = + csContext.getConfiguration() + .getShouldFailAutoQueueCreationWhenGuaranteedCapacityExceeded( + getQueuePath()); + + leafQueueTemplate = initializeLeafQueueConfigs().build(); StringBuffer queueInfo = new StringBuffer(); queueInfo.append("Created Managed Parent Queue: ").append(queueName).append( "]\nwith capacity: [").append(super.getCapacity()).append( "]\nwith max capacity: [").append(super.getMaximumCapacity()).append( - "\nwith max apps: [").append(leafQueueTemplate.getMaxApps()).append( - "]\nwith max apps per user: [").append( - leafQueueTemplate.getMaxAppsPerUser()).append("]\nwith user limit: [") - .append(leafQueueTemplate.getUserLimit()).append( - "]\nwith user limit factor: [").append( - leafQueueTemplate.getUserLimitFactor()).append("]."); + "]."); LOG.info(queueInfo.toString()); + + initializeQueueManagementPolicy(); } @Override public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) throws IOException { - validate(newlyParsedQueue); - super.reinitialize(newlyParsedQueue, clusterResource); - String leafQueueTemplateConfPrefix = getLeafQueueConfigPrefix( - csContext.getConfiguration()); - this.leafQueueTemplate = initializeLeafQueueConfigs( - leafQueueTemplateConfPrefix).build(); + + try { + writeLock.lock(); + validate(newlyParsedQueue); + + shouldFailAutoCreationWhenGuaranteedCapacityExceeded = + csContext.getConfiguration() + .getShouldFailAutoQueueCreationWhenGuaranteedCapacityExceeded( + getQueuePath()); + + //validate if capacity is exceeded for child queues + if (shouldFailAutoCreationWhenGuaranteedCapacityExceeded) { + float childCap = sumOfChildCapacities(); + if (getCapacity() < childCap) { + throw new IOException( + "Total of Auto Created leaf queues guaranteed capacity : " + + childCap + " exceeds Parent queue's " + getQueuePath() + + " guaranteed capacity " + getCapacity() + "" + + ".Cannot enforce policy to auto" + + " create queues beyond parent queue's capacity"); + } + } + + leafQueueTemplate = initializeLeafQueueConfigs().build(); + + super.reinitialize(newlyParsedQueue, clusterResource); + + // run reinitialize on each existing queue, to trigger absolute cap + // recomputations + for (CSQueue res : this.getChildQueues()) { + res.reinitialize(res, clusterResource); + } + + //clear state in policy + reinitializeQueueManagementPolicy(); + + //reassign capacities according to policy + final List queueManagementChanges = + queueManagementPolicy.computeQueueManagementChanges(); + + validateAndApplyQueueManagementChanges(queueManagementChanges); + + StringBuffer queueInfo = new StringBuffer(); + queueInfo.append("Reinitialized Managed Parent Queue: ").append(queueName) + .append("]\nwith capacity: [").append(super.getCapacity()).append( + "]\nwith max capacity: [").append(super.getMaximumCapacity()).append( + "]."); + LOG.info(queueInfo.toString()); + } catch (YarnException ye) { + LOG.error("Exception while computing policy changes for leaf queue : " + + getQueueName(), ye); + throw new IOException(ye); + } finally { + writeLock.unlock(); + } } - @Override - protected AutoCreatedLeafQueueTemplate.Builder initializeLeafQueueConfigs( - String queuePath) { + private void initializeQueueManagementPolicy() { + queueManagementPolicy = + csContext.getConfiguration().getAutoCreatedQueueManagementPolicyClass( + getQueuePath()); + + queueManagementPolicy.init(csContext, this); + } + + private void reinitializeQueueManagementPolicy() { + AutoCreatedQueueManagementPolicy managementPolicy = + csContext.getConfiguration().getAutoCreatedQueueManagementPolicyClass( + getQueuePath()); + + if (!(managementPolicy.getClass().equals( + this.queueManagementPolicy.getClass()))) { + queueManagementPolicy = managementPolicy; + queueManagementPolicy.init(csContext, this); + } else{ + queueManagementPolicy.reinitialize(csContext, this); + } + } + + protected AutoCreatedLeafQueueConfig.Builder initializeLeafQueueConfigs() { - AutoCreatedLeafQueueTemplate.Builder leafQueueTemplate = - super.initializeLeafQueueConfigs(queuePath); + AutoCreatedLeafQueueConfig.Builder builder = + new AutoCreatedLeafQueueConfig.Builder(); - CapacitySchedulerConfiguration conf = csContext.getConfiguration(); - String leafQueueTemplateConfPrefix = getLeafQueueConfigPrefix(conf); + String leafQueueTemplateConfPrefix = getLeafQueueConfigPrefix( + csContext.getConfiguration()); + //Load template configuration + builder.configuration( + super.initializeLeafQueueConfigs(leafQueueTemplateConfPrefix)); + + //Load template capacities QueueCapacities queueCapacities = new QueueCapacities(false); - CSQueueUtils.loadUpdateAndCheckCapacities(leafQueueTemplateConfPrefix, + CSQueueUtils.loadUpdateAndCheckCapacities(csContext.getConfiguration() + .getAutoCreatedQueueTemplateConfPrefix(getQueuePath()), csContext.getConfiguration(), queueCapacities, getQueueCapacities()); - leafQueueTemplate.capacities(queueCapacities); - - shouldFailAutoCreationWhenGuaranteedCapacityExceeded = - conf.getShouldFailAutoQueueCreationWhenGuaranteedCapacityExceeded( - getQueuePath()); + builder.capacities(queueCapacities); - return leafQueueTemplate; + return builder; } protected void validate(final CSQueue newlyParsedQueue) throws IOException { @@ -106,7 +187,7 @@ protected void validate(final CSQueue newlyParsedQueue) throws IOException { @Override public void addChildQueue(CSQueue childQueue) - throws SchedulerDynamicEditException { + throws SchedulerDynamicEditException, IOException { try { writeLock.lock(); @@ -138,21 +219,164 @@ public void addChildQueue(CSQueue childQueue) throw new SchedulerDynamicEditException( "Cannot auto create leaf queue " + leafQueueName + ". Child " + "queues capacities have reached parent queue : " - + parentQueue.getQueuePath() + " guaranteed capacity"); + + parentQueue.getQueuePath() + "'s guaranteed capacity"); } } AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) childQueue; super.addChildQueue(leafQueue); - //TODO - refresh policy queue after capacity management is added + final AutoCreatedLeafQueueConfig initialLeafQueueTemplate = + queueManagementPolicy.getInitialLeafQueueConfiguration(leafQueue); + leafQueue.reinitializeFromTemplate(initialLeafQueueTemplate); } finally { writeLock.unlock(); } } - private String getLeafQueueConfigPrefix(CapacitySchedulerConfiguration conf) { - return conf.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()); + public List getScheduleableApplications() { + try { + readLock.lock(); + List apps = new ArrayList<>(); + for (CSQueue childQueue : getChildQueues()) { + apps.addAll(((LeafQueue) childQueue).getApplications()); + } + return Collections.unmodifiableList(apps); + } finally { + readLock.unlock(); + } } + public List getPendingApplications() { + try { + readLock.lock(); + List apps = new ArrayList<>(); + for (CSQueue childQueue : getChildQueues()) { + apps.addAll(((LeafQueue) childQueue).getPendingApplications()); + } + return Collections.unmodifiableList(apps); + } finally { + readLock.unlock(); + } + } + + public List getAllApplications() { + try { + readLock.lock(); + List apps = new ArrayList<>(); + for (CSQueue childQueue : getChildQueues()) { + apps.addAll(((LeafQueue) childQueue).getAllApplications()); + } + return Collections.unmodifiableList(apps); + } finally { + readLock.unlock(); + } + } + + public String getLeafQueueConfigPrefix(CapacitySchedulerConfiguration conf) { + return CapacitySchedulerConfiguration.PREFIX + conf + .getAutoCreatedQueueTemplateConfPrefix(getQueuePath()); + } + + public boolean shouldFailAutoCreationWhenGuaranteedCapacityExceeded() { + return shouldFailAutoCreationWhenGuaranteedCapacityExceeded; + } + + /** + * Asynchronously called from scheduler to apply queue management changes + * + * @param queueManagementChanges + */ + public void validateAndApplyQueueManagementChanges( + List queueManagementChanges) + throws IOException, SchedulerDynamicEditException { + try { + writeLock.lock(); + + validateQueueManagementChanges(queueManagementChanges); + + applyQueueManagementChanges(queueManagementChanges); + + AutoCreatedQueueManagementPolicy policy = + getAutoCreatedQueueManagementPolicy(); + + //acquires write lock on policy + policy.commitQueueManagementChanges(queueManagementChanges); + + } finally { + writeLock.unlock(); + } + } + + public void validateQueueManagementChanges( + List queueManagementChanges) + throws SchedulerDynamicEditException { + + for (QueueManagementChange queueManagementChange : queueManagementChanges) { + + CSQueue childQueue = queueManagementChange.getQueue(); + + if (!(childQueue instanceof AutoCreatedLeafQueue)) { + throw new SchedulerDynamicEditException( + "queue should be " + "AutoCreatedLeafQueue. Found " + childQueue + .getClass()); + } + + if (!(AbstractManagedParentQueue.class. + isAssignableFrom(childQueue.getParent().getClass()))) { + LOG.error("Queue " + getQueueName() + + " is not an instance of PlanQueue or ManagedParentQueue." + " " + + "Ignoring update " + queueManagementChanges); + throw new SchedulerDynamicEditException( + "Queue " + getQueueName() + " is not a AutoEnabledParentQueue." + + " Ignoring update " + queueManagementChanges); + } + + switch (queueManagementChange.getQueueAction()){ + case UPDATE_QUEUE: + AutoCreatedLeafQueueConfig template = + queueManagementChange.getUpdatedQueueTemplate(); + ((AutoCreatedLeafQueue) childQueue).validateConfigurations(template); + break; + } + } + } + + private void applyQueueManagementChanges( + List queueManagementChanges) + throws SchedulerDynamicEditException, IOException { + for (QueueManagementChange queueManagementChange : queueManagementChanges) { + switch (queueManagementChange.getQueueAction()){ + case UPDATE_QUEUE: + AutoCreatedLeafQueue childQueueToBeUpdated = + (AutoCreatedLeafQueue) queueManagementChange.getQueue(); + //acquires write lock on leaf queue + childQueueToBeUpdated.reinitializeFromTemplate( + queueManagementChange.getUpdatedQueueTemplate()); + break; + } + } + } + + public CapacitySchedulerConfiguration getLeafQueueConfigs( + String leafQueueName) { + return getLeafQueueConfigs(getLeafQueueTemplate().getLeafQueueConfigs(), + leafQueueName); + } + + public CapacitySchedulerConfiguration getLeafQueueConfigs( + CapacitySchedulerConfiguration templateConfig, String leafQueueName) { + CapacitySchedulerConfiguration leafQueueConfigTemplate = new + CapacitySchedulerConfiguration(new Configuration(false), false); + for (final Iterator> iterator = + templateConfig.iterator(); iterator.hasNext(); ) { + Map.Entry confKeyValuePair = iterator.next(); + final String name = confKeyValuePair.getKey().replaceFirst( + CapacitySchedulerConfiguration + .AUTO_CREATED_LEAF_QUEUE_TEMPLATE_PREFIX, + leafQueueName); + leafQueueConfigTemplate.set(name, confKeyValuePair.getValue()); + } + return leafQueueConfigTemplate; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java index b7f8aa6996b..757002f3d78 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java @@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,61 +36,132 @@ private static final Logger LOG = LoggerFactory.getLogger(PlanQueue.class); + private int maxAppsForReservation; + private int maxAppsPerUserForReservation; + private int userLimit; + private float userLimitFactor; + protected CapacitySchedulerContext schedulerContext; private boolean showReservationsAsQueues; public PlanQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { super(cs, queueName, parent, old); - this.leafQueueTemplate = initializeLeafQueueConfigs(getQueuePath()).build(); + + this.schedulerContext = cs; + // Set the reservation queue attributes for the Plan + CapacitySchedulerConfiguration conf = cs.getConfiguration(); + String queuePath = super.getQueuePath(); + int maxAppsForReservation = conf.getMaximumApplicationsPerQueue(queuePath); + showReservationsAsQueues = conf.getShowReservationAsQueues(queuePath); + if (maxAppsForReservation < 0) { + maxAppsForReservation = + (int) (CapacitySchedulerConfiguration. + DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS * super + .getAbsoluteCapacity()); + } + int userLimit = conf.getUserLimit(queuePath); + float userLimitFactor = conf.getUserLimitFactor(queuePath); + int maxAppsPerUserForReservation = + (int) (maxAppsForReservation * (userLimit / 100.0f) * userLimitFactor); + updateQuotas(userLimit, userLimitFactor, maxAppsForReservation, + maxAppsPerUserForReservation); StringBuffer queueInfo = new StringBuffer(); - queueInfo.append("Created Plan Queue: ").append(queueName).append( - "]\nwith capacity: [").append(super.getCapacity()).append( - "]\nwith max capacity: [").append(super.getMaximumCapacity()).append( - "\nwith max apps: [").append(leafQueueTemplate.getMaxApps()).append( - "]\nwith max apps per user: [").append( - leafQueueTemplate.getMaxAppsPerUser()).append("]\nwith user limit: [") - .append(leafQueueTemplate.getUserLimit()).append( - "]\nwith user limit factor: [").append( - leafQueueTemplate.getUserLimitFactor()).append("]."); + queueInfo.append("Created Plan Queue: ").append(queueName) + .append("\nwith capacity: [").append(super.getCapacity()) + .append("]\nwith max capacity: [").append(super.getMaximumCapacity()) + .append("\nwith max reservation apps: [").append(maxAppsForReservation) + .append("]\nwith max reservation apps per user: [") + .append(maxAppsPerUserForReservation).append("]\nwith user limit: [") + .append(userLimit).append("]\nwith user limit factor: [") + .append(userLimitFactor).append("]."); LOG.info(queueInfo.toString()); } @Override - public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource) - throws IOException { - validate(newlyParsedQueue); - super.reinitialize(newlyParsedQueue, clusterResource); - this.leafQueueTemplate = initializeLeafQueueConfigs(getQueuePath()).build(); + public void reinitialize(CSQueue newlyParsedQueue, + Resource clusterResource) throws IOException { + try { + writeLock.lock(); + // Sanity check + if (!(newlyParsedQueue instanceof PlanQueue) || !newlyParsedQueue + .getQueuePath().equals(getQueuePath())) { + throw new IOException( + "Trying to reinitialize " + getQueuePath() + " from " + + newlyParsedQueue.getQueuePath()); + } + + PlanQueue newlyParsedParentQueue = (PlanQueue) newlyParsedQueue; + + if (newlyParsedParentQueue.getChildQueues().size() != 1) { + throw new IOException( + "Reservable Queue should not have sub-queues in the" + + "configuration expect the default reservation queue"); + } + + // Set new configs + setupQueueConfigs(clusterResource); + + updateQuotas(newlyParsedParentQueue.userLimit, + newlyParsedParentQueue.userLimitFactor, + newlyParsedParentQueue.maxAppsForReservation, + newlyParsedParentQueue.maxAppsPerUserForReservation); + + // run reinitialize on each existing queue, to trigger absolute cap + // recomputations + for (CSQueue res : this.getChildQueues()) { + res.reinitialize(res, clusterResource); + } + showReservationsAsQueues = + newlyParsedParentQueue.showReservationsAsQueues; + } finally { + writeLock.unlock(); + } } - @Override - protected AutoCreatedLeafQueueTemplate.Builder initializeLeafQueueConfigs - (String queuePath) { - AutoCreatedLeafQueueTemplate.Builder leafQueueTemplate = super - .initializeLeafQueueConfigs - (queuePath); - showReservationsAsQueues = csContext.getConfiguration() - .getShowReservationAsQueues(queuePath); - return leafQueueTemplate; + private void updateQuotas(int userLimit, float userLimitFactor, + int maxAppsForReservation, int maxAppsPerUserForReservation) { + this.userLimit = userLimit; + this.userLimitFactor = userLimitFactor; + this.maxAppsForReservation = maxAppsForReservation; + this.maxAppsPerUserForReservation = maxAppsPerUserForReservation; } - protected void validate(final CSQueue newlyParsedQueue) throws IOException { - // Sanity check - if (!(newlyParsedQueue instanceof PlanQueue) || !newlyParsedQueue - .getQueuePath().equals(getQueuePath())) { - throw new IOException( - "Trying to reinitialize " + getQueuePath() + " from " - + newlyParsedQueue.getQueuePath()); - } + /** + * Number of maximum applications for each of the reservations in this Plan. + * + * @return maxAppsForreservation + */ + public int getMaxApplicationsForReservations() { + return maxAppsForReservation; + } - PlanQueue newlyParsedParentQueue = (PlanQueue) newlyParsedQueue; + /** + * Number of maximum applications per user for each of the reservations in + * this Plan. + * + * @return maxAppsPerUserForreservation + */ + public int getMaxApplicationsPerUserForReservation() { + return maxAppsPerUserForReservation; + } - if (newlyParsedParentQueue.getChildQueues().size() != 1) { - throw new IOException( - "Reservable Queue should not have sub-queues in the" - + "configuration expect the default reservation queue"); - } + /** + * User limit value for each of the reservations in this Plan. + * + * @return userLimit + */ + public int getUserLimitForReservation() { + return userLimit; + } + + /** + * User limit factor value for each of the reservations in this Plan. + * + * @return userLimitFactor + */ + public float getUserLimitFactor() { + return userLimitFactor; } /** @@ -98,4 +170,4 @@ protected void validate(final CSQueue newlyParsedQueue) throws IOException { public boolean showReservationsAsQueues() { return showReservationsAsQueues; } -} +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java new file mode 100644 index 00000000000..74d9b23c0b3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.QueueState; + +/** + * Encapsulates Queue entitlement and state updates needed + * for adjusting capacity dynamically + * + */ +@Private +@Unstable +public abstract class QueueManagementChange { + + private final CSQueue queue; + + /** + * Updating the queue may involve entitlement updates + * and/or QueueState changes + * + * QueueAction can potentially be enhanced + * for adding, removing queues for queue management + */ + public enum QueueAction { + UPDATE_QUEUE + } + + private AutoCreatedLeafQueueConfig + queueTemplateUpdate; + + private final QueueAction queueAction; + /** + * Updated Queue state with the new entitlement + */ + private QueueState transitionToQueueState; + + public QueueManagementChange(final CSQueue queue, + final QueueAction queueAction) { + this.queue = queue; + this.queueAction = queueAction; + } + + public QueueManagementChange(final CSQueue queue, + final QueueAction queueAction, QueueState targetQueueState, + final AutoCreatedLeafQueueConfig + queueTemplateUpdates) { + this(queue, queueAction, queueTemplateUpdates); + this.transitionToQueueState = targetQueueState; + } + + public QueueManagementChange(final CSQueue queue, + final QueueAction queueAction, + final AutoCreatedLeafQueueConfig + queueTemplateUpdates) { + this(queue, queueAction); + this.queueTemplateUpdate = queueTemplateUpdates; + } + + public QueueState getTransitionToQueueState() { + return transitionToQueueState; + } + + public CSQueue getQueue() { + return queue; + } + + public AutoCreatedLeafQueueConfig getUpdatedQueueTemplate() { + return queueTemplateUpdate; + } + + public QueueAction getQueueAction() { + return queueAction; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof QueueManagementChange)) + return false; + + QueueManagementChange that = (QueueManagementChange) o; + + if (queue != null ? !queue.equals(that.queue) : that.queue != null) + return false; + if (queueTemplateUpdate != null ? !queueTemplateUpdate.equals( + that.queueTemplateUpdate) : that.queueTemplateUpdate != null) + return false; + if (queueAction != that.queueAction) + return false; + return transitionToQueueState == that.transitionToQueueState; + } + + @Override + public int hashCode() { + int result = queue != null ? queue.hashCode() : 0; + result = 31 * result + (queueTemplateUpdate != null ? + queueTemplateUpdate.hashCode() : + 0); + result = 31 * result + (queueAction != null ? queueAction.hashCode() : 0); + result = 31 * result + (transitionToQueueState != null ? + transitionToQueueState.hashCode() : + 0); + return result; + } + + @Override + public String toString() { + return "QueueManagementChange{" + "queue=" + queue + + ", updatedEntitlementsByPartition=" + queueTemplateUpdate + + ", queueAction=" + queueAction + ", transitionToQueueState=" + + transitionToQueueState + '}'; + } + + public static class UpdateQueue extends QueueManagementChange { + + public UpdateQueue(final CSQueue queue, QueueState targetQueueState, + final AutoCreatedLeafQueueConfig + queueTemplateUpdate) { + super(queue, QueueAction.UPDATE_QUEUE, targetQueueState, + queueTemplateUpdate); + } + + public UpdateQueue(final CSQueue queue, + final AutoCreatedLeafQueueConfig + queueTemplateUpdate) { + super(queue, QueueAction.UPDATE_QUEUE, queueTemplateUpdate); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java new file mode 100644 index 00000000000..9b0cf7bc93b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java @@ -0,0 +1,272 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; + + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .QueueManagementChangeEvent; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; + +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Queue Management scheduling policy for managed parent queues which enable + * auto child queue creation + */ +public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy { + + private static final Log LOG = + LogFactory.getLog(QueueManagementDynamicEditPolicy.class); + + private Clock clock; + + // Pointer to other RM components + private RMContext rmContext; + private ResourceCalculator rc; + private CapacityScheduler scheduler; + private RMNodeLabelsManager nlm; + + private long monitoringInterval; + + private Set managedParentQueues = new HashSet<>(); + + /** + * Instantiated by CapacitySchedulerConfiguration + */ + public QueueManagementDynamicEditPolicy() { + clock = SystemClock.getInstance(); + } + + @SuppressWarnings("unchecked") + @VisibleForTesting + public QueueManagementDynamicEditPolicy(RMContext context, + CapacityScheduler scheduler) { + init(context.getYarnConfiguration(), context, scheduler); + } + + @SuppressWarnings("unchecked") + @VisibleForTesting + public QueueManagementDynamicEditPolicy(RMContext context, + CapacityScheduler scheduler, Clock clock) { + init(context.getYarnConfiguration(), context, scheduler); + this.clock = clock; + } + + @Override + public void init(final Configuration config, final RMContext context, + final ResourceScheduler sched) { + LOG.info("Queue Management Policy monitor:" + this. + getClass().getCanonicalName()); + assert null == scheduler : "Unexpected duplicate call to init"; + if (!(sched instanceof CapacityScheduler)) { + throw new YarnRuntimeException("Class " + + sched.getClass().getCanonicalName() + " not instance of " + + CapacityScheduler.class.getCanonicalName()); + } + rmContext = context; + scheduler = (CapacityScheduler) sched; + clock = scheduler.getClock(); + + rc = scheduler.getResourceCalculator(); + nlm = scheduler.getRMContext().getNodeLabelManager(); + + CapacitySchedulerConfiguration csConfig = scheduler.getConfiguration(); + + monitoringInterval = csConfig.getLong( + CapacitySchedulerConfiguration.QUEUE_MANAGEMENT_MONITORING_INTERVAL, + CapacitySchedulerConfiguration. + DEFAULT_QUEUE_MANAGEMENT_MONITORING_INTERVAL); + + initQueues(); + } + + /** + * Reinitializes queues(Called on scheduler.reinitialize) + * @param config Configuration + * @param context The resourceManager's context + * @param sched The scheduler + */ + public void reinitialize(final Configuration config, final RMContext context, + final ResourceScheduler sched) { + //TODO - Wire with scheduler reinitialize and remove initQueues below? + initQueues(); + } + + private void initQueues() { + managedParentQueues.clear(); + for (Map.Entry queues : scheduler + .getCapacitySchedulerQueueManager() + .getQueues().entrySet()) { + + String queueName = queues.getKey(); + CSQueue queue = queues.getValue(); + + if ( queue instanceof ManagedParentQueue) { + managedParentQueues.add(queueName); + } + } + } + + @Override + public void editSchedule() { + long startTs = clock.getTime(); + + initQueues(); + manageAutoCreatedLeafQueues(); + + if (LOG.isDebugEnabled()) { + LOG.debug("Total time used=" + (clock.getTime() - startTs) + " ms."); + } + } + + @VisibleForTesting + List manageAutoCreatedLeafQueues() + { + + List queueManagementChanges = new ArrayList<>(); + // All partitions to look at + + //Proceed only if there are queues to process + if (managedParentQueues.size() > 0) { + for (String parentQueueName : managedParentQueues) { + ManagedParentQueue parentQueue = + (ManagedParentQueue) scheduler.getCapacitySchedulerQueueManager(). + getQueue(parentQueueName); + + queueManagementChanges.addAll( + computeQueueManagementChanges + (parentQueue)); + } + } + return queueManagementChanges; + } + + + @VisibleForTesting + List computeQueueManagementChanges + (ManagedParentQueue parentQueue) { + + List queueManagementChanges = + Collections.emptyList(); + if (!parentQueue.shouldFailAutoCreationWhenGuaranteedCapacityExceeded()) { + + AutoCreatedQueueManagementPolicy policyClazz = + parentQueue.getAutoCreatedQueueManagementPolicy(); + long startTime = 0; + try { + if (LOG.isDebugEnabled()) { + LOG.debug(MessageFormat + .format("Trying to use {0} to compute preemption " + + "candidates", + policyClazz.getClass().getName())); + startTime = clock.getTime(); + } + + queueManagementChanges = policyClazz.computeQueueManagementChanges(); + + //Scheduler update is asynchronous + if (queueManagementChanges.size() > 0) { + QueueManagementChangeEvent queueManagementChangeEvent = + new QueueManagementChangeEvent(parentQueue, + queueManagementChanges); + scheduler.getRMContext().getDispatcher().getEventHandler().handle( + queueManagementChangeEvent); + } + + if (LOG.isDebugEnabled()) { + LOG.debug(MessageFormat.format("{0} uses {1} millisecond" + + " to run", + policyClazz.getClass().getName(), clock.getTime() + - startTime)); + if (queueManagementChanges.size() > 0) { + LOG.debug(" Updated queue management updates for parent queue" + + " [" + + parentQueue.getQueueName() + ": [\n" + queueManagementChanges + .toString() + "\n]"); + } + } + } catch (YarnException e) { + LOG.error( + "Could not compute child queue management updates for parent " + + "queue " + + parentQueue.getQueueName(), e); + } + } else{ + if (LOG.isDebugEnabled()) { + LOG.debug( + "Skipping queue management updates for parent queue " + + parentQueue + .getQueuePath() + " " + + "since configuration for auto creating queue's beyond " + + "parent's " + + "guaranteed capacity is disabled"); + } + } + return queueManagementChanges; + } + + @Override + public long getMonitoringInterval() { + return monitoringInterval; + } + + @Override + public String getPolicyName() { + return "QueueManagementDynamicEditPolicy"; + } + + public ResourceCalculator getResourceCalculator() { + return rc; + } + + public RMContext getRmContext() { + return rmContext; + } + + public ResourceCalculator getRC() { + return rc; + } + + public CapacityScheduler getScheduler() { + return scheduler; + } + + public Set getManagedParentQueues() { + return managedParentQueues; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ReservationQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ReservationQueue.java new file mode 100644 index 00000000000..34f4aa151ab --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ReservationQueue.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import java.io.IOException; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This represents a dynamic {@link LeafQueue} managed by the + * {@link ReservationSystem} + * + */ +public class ReservationQueue extends AbstractAutoCreatedLeafQueue { + + private static final Logger LOG = LoggerFactory + .getLogger(ReservationQueue.class); + + private PlanQueue parent; + + public ReservationQueue(CapacitySchedulerContext cs, String queueName, + PlanQueue parent) throws IOException { + super(cs, queueName, parent, null); + // the following parameters are common to all reservation in the plan + updateQuotas(parent.getUserLimitForReservation(), + parent.getUserLimitFactor(), + parent.getMaxApplicationsForReservations(), + parent.getMaxApplicationsPerUserForReservation()); + this.parent = parent; + } + + @Override + public void reinitialize(CSQueue newlyParsedQueue, + Resource clusterResource) throws IOException { + try { + writeLock.lock(); + // Sanity check + if (!(newlyParsedQueue instanceof ReservationQueue) || !newlyParsedQueue + .getQueuePath().equals(getQueuePath())) { + throw new IOException( + "Trying to reinitialize " + getQueuePath() + " from " + + newlyParsedQueue.getQueuePath()); + } + super.reinitialize(newlyParsedQueue, clusterResource); + CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, + this, labelManager, null); + + updateQuotas(parent.getUserLimitForReservation(), + parent.getUserLimitFactor(), + parent.getMaxApplicationsForReservations(), + parent.getMaxApplicationsPerUserForReservation()); + } finally { + writeLock.unlock(); + } + } + + private void updateQuotas(int userLimit, float userLimitFactor, + int maxAppsForReservation, int maxAppsPerUserForReservation) { + setUserLimit(userLimit); + setUserLimitFactor(userLimitFactor); + setMaxApplications(maxAppsForReservation); + maxApplicationsPerUser = maxAppsPerUserForReservation; + } + + @Override + protected void setupConfigurableCapacities(CapacitySchedulerConfiguration + configuration) { + super.setupConfigurableCapacities(queueCapacities); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java new file mode 100644 index 00000000000..aee6405dd7e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java @@ -0,0 +1,745 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .queuemanagement; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerDynamicEditException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .AbstractAutoCreatedLeafQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .AutoCreatedLeafQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .AutoCreatedLeafQueueConfig; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .AutoCreatedQueueManagementPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .CapacitySchedulerContext; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .LeafQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ManagedParentQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ParentQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .QueueCapacities; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .QueueManagementChange; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica + .FiCaSchedulerApp; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.MonotonicClock; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager + .NO_LABEL; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CSQueueUtils.EPSILON; + +/** + * Capacity Management policy for auto created leaf queues + *

+ * Assigns capacity if available to leaf queues based on application + * submission order i.e leaf queues are assigned capacity in FCFS order based + * on application submission time. Updates leaf queue capacities to 0 when + * there are no pending or running apps under that queue. + */ +public class GuaranteedOrZeroCapacityOverTimePolicy + implements AutoCreatedQueueManagementPolicy { + + private CapacitySchedulerContext scheduler; + private ManagedParentQueue managedParentQueue; + + private static final Log LOG = LogFactory.getLog( + GuaranteedOrZeroCapacityOverTimePolicy.class); + + private AutoCreatedLeafQueueConfig ZERO_CAPACITY_ENTITLEMENT; + + private ReentrantReadWriteLock.WriteLock writeLock; + + private ReentrantReadWriteLock.ReadLock readLock; + + private ParentQueueState parentQueueState = new ParentQueueState(); + + private AutoCreatedLeafQueueConfig leafQueueTemplate; + + private QueueCapacities leafQueueTemplateCapacities; + + private Map leafQueueStateMap = new HashMap<>(); + + private Clock clock = new MonotonicClock(); + + private class LeafQueueState { + + private AtomicBoolean isActive = new AtomicBoolean(false); + + private long mostRecentActivationTime; + + private long mostRecentDeactivationTime; + + public long getMostRecentActivationTime() { + return mostRecentActivationTime; + } + + public long getMostRecentDeactivationTime() { + return mostRecentDeactivationTime; + } + + /** + * Is the queue currently active or deactivated? + * + * @return true if Active else false + */ + public boolean isActive() { + return isActive.get(); + } + + private boolean activate() { + boolean ret = isActive.compareAndSet(false, true); + mostRecentActivationTime = clock.getTime(); + return ret; + } + + private boolean deactivate() { + boolean ret = isActive.compareAndSet(true, false); + mostRecentDeactivationTime = clock.getTime(); + return ret; + } + } + + private boolean containsLeafQueue(String leafQueueName) { + return leafQueueStateMap.containsKey(leafQueueName); + } + + private boolean addLeafQueueStateIfNotExists(String leafQueueName, + LeafQueueState leafQueueState) { + if (!containsLeafQueue(leafQueueName)) { + leafQueueStateMap.put(leafQueueName, leafQueueState); + return true; + } + return false; + } + + private boolean addLeafQueueStateIfNotExists(LeafQueue leafQueue) { + return addLeafQueueStateIfNotExists(leafQueue.getQueueName(), + new LeafQueueState()); + } + + private void clearLeafQueueState() { + leafQueueStateMap.clear(); + } + + private class ParentQueueState { + + private Map totalAbsoluteActivatedChildQueueCapacityByLabel = + new HashMap(); + + private float getAbsoluteActivatedChildQueueCapacity() { + return getAbsoluteActivatedChildQueueCapacity(NO_LABEL); + } + + private float getAbsoluteActivatedChildQueueCapacity(String nodeLabel) { + try { + readLock.lock(); + Float totalActivatedCapacity = getByLabel(nodeLabel); + if (totalActivatedCapacity != null) { + return totalActivatedCapacity; + } else{ + return 0; + } + } finally { + readLock.unlock(); + } + } + + private void incAbsoluteActivatedChildCapacity(String nodeLabel, + float childQueueCapacity) { + try { + writeLock.lock(); + Float activatedChildCapacity = getByLabel(nodeLabel); + if (activatedChildCapacity != null) { + setByLabel(nodeLabel, activatedChildCapacity + childQueueCapacity); + } else{ + setByLabel(nodeLabel, childQueueCapacity); + } + } finally { + writeLock.unlock(); + } + } + + private void decAbsoluteActivatedChildCapacity(String nodeLabel, + float childQueueCapacity) { + try { + writeLock.lock(); + Float activatedChildCapacity = getByLabel(nodeLabel); + if (activatedChildCapacity != null) { + setByLabel(nodeLabel, activatedChildCapacity - childQueueCapacity); + } else{ + setByLabel(nodeLabel, childQueueCapacity); + } + } finally { + writeLock.unlock(); + } + } + + Float getByLabel(String label) { + return totalAbsoluteActivatedChildQueueCapacityByLabel.get(label); + } + + Float setByLabel(String label, float val) { + return totalAbsoluteActivatedChildQueueCapacityByLabel.put(label, val); + } + + void clear() { + totalAbsoluteActivatedChildQueueCapacityByLabel.clear(); + } + } + + /** + * Comparator that orders applications by their submit time + */ + private class PendingApplicationComparator + implements Comparator { + + @Override + public int compare(FiCaSchedulerApp app1, FiCaSchedulerApp app2) { + RMApp rmApp1 = scheduler.getRMContext().getRMApps().get( + app1.getApplicationId()); + RMApp rmApp2 = scheduler.getRMContext().getRMApps().get( + app2.getApplicationId()); + if (rmApp1 != null && rmApp2 != null) { + return Long.compare(rmApp1.getSubmitTime(), rmApp2.getSubmitTime()); + } else if (rmApp1 != null) { + return -1; + } else if (rmApp2 != null) { + return 1; + } else{ + return 0; + } + } + } + + private PendingApplicationComparator applicationComparator = + new PendingApplicationComparator(); + + @Override + public void init(final CapacitySchedulerContext schedulerContext, + final ParentQueue parentQueue) { + this.scheduler = schedulerContext; + + ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + readLock = lock.readLock(); + writeLock = lock.writeLock(); + + if (!(parentQueue instanceof ManagedParentQueue)) { + throw new IllegalArgumentException( + "Expected instance of type " + ManagedParentQueue.class); + } + + this.managedParentQueue = (ManagedParentQueue) parentQueue; + + initializeLeafQueueTemplate(this.managedParentQueue); + + LOG.info( + "Initialized queue management policy for parent queue " + parentQueue + .getQueueName() + " with leaf queue template capacities : [" + + leafQueueTemplate.getQueueCapacities() + "]"); + } + + private void initializeLeafQueueTemplate(ManagedParentQueue parentQueue) { + leafQueueTemplate = parentQueue.getLeafQueueTemplate(); + + leafQueueTemplateCapacities = leafQueueTemplate.getQueueCapacities(); + + ZERO_CAPACITY_ENTITLEMENT = buildTemplate(0.0f, + leafQueueTemplateCapacities.getMaximumCapacity()); + } + + @Override + public List computeQueueManagementChanges() + throws SchedulerDynamicEditException { + + //TODO : Add support for node labels on leaf queue template configurations + //synch/add missing leaf queue(s) if any to state + updateLeafQueueState(); + + try { + readLock.lock(); + List queueManagementChanges = new ArrayList<>(); + + // check if any leaf queues need to be deactivated based on pending + // applications and + float parentAbsoluteCapacity = + managedParentQueue.getQueueCapacities().getAbsoluteCapacity(); + + float leafQueueTemplateAbsoluteCapacity = + leafQueueTemplateCapacities.getAbsoluteCapacity(); + Map deactivatedLeafQueues = + deactivateLeafQueuesIfInActive(managedParentQueue, queueManagementChanges); + + float deactivatedCapacity = getTotalDeactivatedCapacity( + deactivatedLeafQueues); + + float sumOfChildQueueActivatedCapacity = parentQueueState. + getAbsoluteActivatedChildQueueCapacity(); + + //Check if we need to activate anything at all? + float availableCapacity = getAvailableCapacity(parentAbsoluteCapacity, + deactivatedCapacity, sumOfChildQueueActivatedCapacity); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Parent queue : " + managedParentQueue.getQueueName() + " absCapacity = " + + parentAbsoluteCapacity + ", leafQueueAbsoluteCapacity = " + + leafQueueTemplateAbsoluteCapacity + ", deactivatedCapacity = " + + deactivatedCapacity + " , absChildActivatedCapacity = " + + sumOfChildQueueActivatedCapacity + ", availableCapacity = " + + availableCapacity); + } + + if (availableCapacity >= leafQueueTemplateAbsoluteCapacity) { + //sort applications across leaf queues by submit time + List pendingApps = getSortedPendingApplications(); + + if (pendingApps.size() > 0) { + int maxLeafQueuesTobeActivated = getMaxLeavesToBeActivated( + availableCapacity, leafQueueTemplateAbsoluteCapacity, + pendingApps.size()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Found " + maxLeafQueuesTobeActivated + + " leaf queues to be activated with " + pendingApps.size() + + " apps "); + } + + LinkedHashSet leafQueuesToBeActivated = getSortedLeafQueues( + pendingApps, maxLeafQueuesTobeActivated, + deactivatedLeafQueues.keySet()); + + //Compute entitlement changes for the identified leaf queues + // which is appended to the List of queueManagementChanges + computeQueueManagementChanges(leafQueuesToBeActivated, + queueManagementChanges, availableCapacity, + leafQueueTemplateAbsoluteCapacity); + + if (LOG.isDebugEnabled()) { + if (leafQueuesToBeActivated.size() > 0) { + LOG.debug( + "Activated leaf queues : [" + leafQueuesToBeActivated + "]"); + } + } + } + } + return queueManagementChanges; + } finally { + readLock.unlock(); + } + } + + private float getTotalDeactivatedCapacity( + Map deactivatedLeafQueues) { + float deactivatedCapacity = 0; + for (Iterator> iterator = + deactivatedLeafQueues.entrySet().iterator(); iterator.hasNext(); ) { + Map.Entry deactivatedQueueCapacity = + iterator.next(); + deactivatedCapacity += + deactivatedQueueCapacity.getValue().getAbsoluteCapacity(); + } + return deactivatedCapacity; + } + + @VisibleForTesting + void updateLeafQueueState() { + try { + writeLock.lock(); + Set newQueues = new HashSet<>(); + for (CSQueue newQueue : managedParentQueue.getChildQueues()) { + if (newQueue instanceof LeafQueue) { + addLeafQueueStateIfNotExists((LeafQueue) newQueue); + newQueues.add(newQueue.getQueueName()); + } + } + + for (Iterator> itr = + leafQueueStateMap.entrySet().iterator(); itr.hasNext(); ) { + Map.Entry e = itr.next(); + String queueName = e.getKey(); + if (!newQueues.contains(queueName)) { + itr.remove(); + } + } + } finally { + writeLock.unlock(); + } + } + + private LinkedHashSet getSortedLeafQueues( + final List pendingApps, int leafQueuesNeeded, + Set deactivatedQueues) throws SchedulerDynamicEditException { + + LinkedHashSet leafQueues = new LinkedHashSet<>(leafQueuesNeeded); + int ctr = 0; + for (FiCaSchedulerApp app : pendingApps) { + + AutoCreatedLeafQueue leafQueue = + (AutoCreatedLeafQueue) app.getCSLeafQueue(); + String leafQueueName = leafQueue.getQueueName(); + + //Check if leafQueue is not active already and has any pending apps + if (ctr < leafQueuesNeeded) { + + if (!isActive(leafQueue)) { + if (!deactivatedQueues.contains(leafQueueName)) { + if (addLeafQueueIfNotExists(leafQueues, leafQueueName)) { + ctr++; + } + } + } + } else{ + break; + } + } + return leafQueues; + } + + private boolean addLeafQueueIfNotExists(Set leafQueues, + String leafQueueName) { + boolean ret = false; + if (!leafQueues.contains(leafQueueName)) { + ret = leafQueues.add(leafQueueName); + } + return ret; + } + + @VisibleForTesting + public boolean isActive(final AutoCreatedLeafQueue leafQueue) + throws SchedulerDynamicEditException { + try { + readLock.lock(); + LeafQueueState leafQueueStatus = getLeafQueueState(leafQueue); + return leafQueueStatus.isActive(); + } finally { + readLock.unlock(); + } + } + + private Map deactivateLeafQueuesIfInActive( + ParentQueue parentQueue, + List queueManagementChanges) + throws SchedulerDynamicEditException { + Map deactivatedQueues = new HashMap<>(); + + for (CSQueue childQueue : parentQueue.getChildQueues()) { + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) childQueue; + + if (isActive(leafQueue) && !hasPendingApps(leafQueue)) { + queueManagementChanges.add( + new QueueManagementChange.UpdateQueue(leafQueue, + ZERO_CAPACITY_ENTITLEMENT)); + deactivatedQueues.put(leafQueue.getQueueName(), + leafQueueTemplateCapacities); + } else{ + if (LOG.isDebugEnabled()) { + LOG.debug(" Leaf queue has pending applications : " + leafQueue + .getNumApplications() + ".Skipping deactivation for " + + leafQueue); + } + } + } + + if (LOG.isDebugEnabled()) { + if (deactivatedQueues.size() > 0) { + LOG.debug("Deactivated leaf queues : " + deactivatedQueues); + } + } + return deactivatedQueues; + } + + private void computeQueueManagementChanges( + Set leafQueuesToBeActivated, + List queueManagementChanges, + final float availableCapacity, + final float leafQueueTemplateAbsoluteCapacity) { + + float curAvailableCapacity = availableCapacity; + + for (String curLeafQueue : leafQueuesToBeActivated) { + // Activate queues if capacity is available + if (curAvailableCapacity >= leafQueueTemplateAbsoluteCapacity) { + AutoCreatedLeafQueue leafQueue = + (AutoCreatedLeafQueue) scheduler.getCapacitySchedulerQueueManager() + .getQueue(curLeafQueue); + if (leafQueue != null) { + AutoCreatedLeafQueueConfig newTemplate = buildTemplate( + leafQueueTemplateCapacities.getCapacity(), + leafQueueTemplateCapacities.getMaximumCapacity()); + queueManagementChanges.add( + new QueueManagementChange.UpdateQueue(leafQueue, newTemplate)); + curAvailableCapacity -= leafQueueTemplateAbsoluteCapacity; + } else{ + LOG.warn( + "Could not find queue in scheduler while trying to deactivate " + + curLeafQueue); + } + } + } + } + + @VisibleForTesting + public int getMaxLeavesToBeActivated(float availableCapacity, + float childQueueAbsoluteCapacity, int numPendingApps) + throws SchedulerDynamicEditException { + + if (childQueueAbsoluteCapacity > 0) { + int numLeafQueuesNeeded = (int) Math.floor( + availableCapacity / childQueueAbsoluteCapacity); + + return Math.min(numLeafQueuesNeeded, numPendingApps); + } else{ + throw new SchedulerDynamicEditException("Child queue absolute capacity " + + "is initialized to 0. Check parent queue's " + managedParentQueue + .getQueueName() + " leaf queue template configuration"); + } + } + + private float getAvailableCapacity(float parentAbsCapacity, + float deactivatedAbsCapacity, float totalChildQueueActivatedCapacity) { + return parentAbsCapacity - totalChildQueueActivatedCapacity + + deactivatedAbsCapacity + EPSILON; + } + + /** + * Commit queue management changes - which involves updating required state + * on parent/underlying leaf queues + * + * @param queueManagementChanges Queue Management changes to commit + * @throws SchedulerDynamicEditException when validation fails + */ + @Override + public void commitQueueManagementChanges( + List queueManagementChanges) + throws SchedulerDynamicEditException { + try { + writeLock.lock(); + for (QueueManagementChange queueManagementChange : + queueManagementChanges) { + AutoCreatedLeafQueueConfig updatedQueueTemplate = + queueManagementChange.getUpdatedQueueTemplate(); + CSQueue queue = queueManagementChange.getQueue(); + if (!(queue instanceof AutoCreatedLeafQueue)) { + throw new SchedulerDynamicEditException( + "Expected queue management change for AutoCreatedLeafQueue. " + + "Found " + queue.getClass().getName()); + } + + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) queue; + + if (updatedQueueTemplate.getQueueCapacities().getCapacity() > 0) { + if (isActive(leafQueue)) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Queue is already active. Skipping activation : " + queue + .getQueuePath()); + } + } else{ + activate(leafQueue); + } + } else{ + if (!isActive(leafQueue)) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Queue is already de-activated. " + "Skipping de-activation " + + ": " + leafQueue.getQueuePath()); + } + } else{ + deactivate(leafQueue); + } + } + } + } finally { + writeLock.unlock(); + } + } + + private void activate(final AutoCreatedLeafQueue leafQueue) + throws SchedulerDynamicEditException { + try { + writeLock.lock(); + getLeafQueueState(leafQueue).activate(); + + parentQueueState.incAbsoluteActivatedChildCapacity(NO_LABEL, + leafQueueTemplateCapacities.getAbsoluteCapacity()); + } finally { + writeLock.unlock(); + } + } + + private void deactivate(final AutoCreatedLeafQueue leafQueue) + throws SchedulerDynamicEditException { + try { + writeLock.lock(); + getLeafQueueState(leafQueue).deactivate(); + + for (String nodeLabel : managedParentQueue.getQueueCapacities() + .getExistingNodeLabels()) { + parentQueueState.decAbsoluteActivatedChildCapacity(nodeLabel, + leafQueueTemplateCapacities.getAbsoluteCapacity()); + } + } finally { + writeLock.unlock(); + } + } + + public boolean hasPendingApps(final AutoCreatedLeafQueue leafQueue) { + return leafQueue.getNumApplications() > 0; + } + + @Override + public void reinitialize(CapacitySchedulerContext schedulerContext, + final ParentQueue parentQueue) { + if (!(parentQueue instanceof ManagedParentQueue)) { + throw new IllegalStateException( + "Expected instance of type " + ManagedParentQueue.class + " found " + + " : " + parentQueue.getClass()); + } + + if (this.managedParentQueue != null && !parentQueue.getQueuePath().equals( + this.managedParentQueue.getQueuePath())) { + throw new IllegalStateException( + "Expected parent queue path to match " + this.managedParentQueue + .getQueuePath() + " found : " + parentQueue.getQueuePath()); + } + + this.managedParentQueue = (ManagedParentQueue) parentQueue; + + initializeLeafQueueTemplate(this.managedParentQueue); + + //clear state + parentQueueState.clear(); + clearLeafQueueState(); + + LOG.info( + "Reinitialized queue management policy for parent queue " + + parentQueue.getQueueName() +" with leaf queue template " + + "capacities : [" + + leafQueueTemplate.getQueueCapacities() + "]"); + } + + @Override + public AutoCreatedLeafQueueConfig getInitialLeafQueueConfiguration( + AbstractAutoCreatedLeafQueue leafQueue) + throws SchedulerDynamicEditException { + + if ( !(leafQueue instanceof AutoCreatedLeafQueue)) { + throw new SchedulerDynamicEditException("Not an instance of " + + "AutoCreatedLeafQueue : " + leafQueue.getClass()); + } + + AutoCreatedLeafQueue autoCreatedLeafQueue = + (AutoCreatedLeafQueue) leafQueue; + AutoCreatedLeafQueueConfig template = ZERO_CAPACITY_ENTITLEMENT; + try { + writeLock.lock(); + if (!addLeafQueueStateIfNotExists(leafQueue)) { + LOG.error("Leaf queue already exists in state : " + getLeafQueueState( + leafQueue)); + throw new SchedulerDynamicEditException( + "Leaf queue already exists in state : " + getLeafQueueState( + leafQueue)); + } + + float availableCapacity = getAvailableCapacity( + managedParentQueue.getQueueCapacities().getAbsoluteCapacity(), 0, + parentQueueState.getAbsoluteActivatedChildQueueCapacity()); + + if (availableCapacity >= leafQueueTemplateCapacities + .getAbsoluteCapacity()) { + activate(autoCreatedLeafQueue); + template = buildTemplate(leafQueueTemplateCapacities.getCapacity(), + leafQueueTemplateCapacities.getMaximumCapacity()); + } + } finally { + writeLock.unlock(); + } + return template; + } + + @VisibleForTesting + LeafQueueState getLeafQueueState(LeafQueue queue) + throws SchedulerDynamicEditException { + try { + readLock.lock(); + String queueName = queue.getQueueName(); + if (!containsLeafQueue(queueName)) { + throw new SchedulerDynamicEditException( + "Could not find leaf queue in " + "state " + queueName); + } else{ + return leafQueueStateMap.get(queueName); + } + } finally { + readLock.unlock(); + } + } + + @VisibleForTesting + public float getAbsoluteActivatedChildQueueCapacity() { + return parentQueueState.getAbsoluteActivatedChildQueueCapacity(); + } + + private List getSortedPendingApplications() { + List apps = new ArrayList<>( + managedParentQueue.getAllApplications()); + Collections.sort(apps, applicationComparator); + return apps; + } + + private AutoCreatedLeafQueueConfig buildTemplate(float capacity, + float maxCapacity) { + AutoCreatedLeafQueueConfig.Builder templateBuilder = + new AutoCreatedLeafQueueConfig.Builder(); + + QueueCapacities capacities = new QueueCapacities(false); + templateBuilder.capacities(capacities); + + for (String nodeLabel : managedParentQueue.getQueueCapacities() + .getExistingNodeLabels()) { + capacities.setCapacity(nodeLabel, capacity); + capacities.setMaximumCapacity(nodeLabel, maxCapacity); + } + + return new AutoCreatedLeafQueueConfig(templateBuilder); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/QueueEntitlement.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/QueueEntitlement.java index 2a751e3e437..f4182f3101f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/QueueEntitlement.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/QueueEntitlement.java @@ -43,4 +43,26 @@ public float getCapacity() { public void setCapacity(float capacity) { this.capacity = capacity; } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof QueueEntitlement)) + return false; + + QueueEntitlement that = (QueueEntitlement) o; + + if (Float.compare(that.capacity, capacity) != 0) + return false; + return Float.compare(that.maxCapacity, maxCapacity) == 0; + } + + @Override + public int hashCode() { + int result = (capacity != +0.0f ? Float.floatToIntBits(capacity) : 0); + result = 31 * result + (maxCapacity != +0.0f ? Float.floatToIntBits( + maxCapacity) : 0); + return result; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/QueueManagementChangeEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/QueueManagementChangeEvent.java new file mode 100644 index 00000000000..926e1be6668 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/QueueManagementChangeEvent.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.event; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .ParentQueue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .QueueManagementChange; + +import java.util.List; + +/** + * Event to update scheduler of any queue management changes + */ +public class QueueManagementChangeEvent extends SchedulerEvent { + + private ParentQueue parentQueue; + private List queueManagementChanges; + + public QueueManagementChangeEvent(ParentQueue parentQueue, + List queueManagementChanges) { + super(SchedulerEventType.MANAGE_QUEUE); + this.parentQueue = parentQueue; + this.queueManagementChanges = queueManagementChanges; + } + + public ParentQueue getParentQueue() { + return parentQueue; + } + + public List getQueueManagementChanges() { + return queueManagementChanges; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java index 229e0bbc0be..b107cf4ee61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java @@ -51,5 +51,8 @@ MARK_CONTAINER_FOR_KILLABLE, // Cancel a killable container - MARK_CONTAINER_FOR_NONKILLABLE + MARK_CONTAINER_FOR_NONKILLABLE, + + //Queue Management Change + MANAGE_QUEUE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java new file mode 100644 index 00000000000..9874f02310c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoCreatedQueueBase.java @@ -0,0 +1,584 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.commons.lang.math.RandomUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.Event; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.placement + .ApplicationPlacementContext; +import org.apache.hadoop.yarn.server.resourcemanager.placement + .UserGroupMappingPlacementRule; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .ResourceScheduler; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerDynamicEditException; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .queuemanagement.GuaranteedOrZeroCapacityOverTimePolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common + .QueueEntitlement; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .AppAttemptAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .SchedulerEvent; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager + .NO_LABEL; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CSQueueUtils.EPSILON; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CapacitySchedulerConfiguration.DOT; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CapacitySchedulerConfiguration.FAIR_APP_ORDERING_POLICY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +public class TestCapacitySchedulerAutoCreatedQueueBase { + + private static final Log LOG = LogFactory.getLog( + TestCapacitySchedulerAutoCreatedQueueBase.class); + protected final int GB = 1024; + protected final static ContainerUpdates NULL_UPDATE_REQUESTS = + new ContainerUpdates(); + + protected static final String A = CapacitySchedulerConfiguration.ROOT + ".a"; + protected static final String B = CapacitySchedulerConfiguration.ROOT + ".b"; + protected static final String C = CapacitySchedulerConfiguration.ROOT + ".c"; + protected static final String D = CapacitySchedulerConfiguration.ROOT + ".d"; + protected static final String A1 = A + ".a1"; + protected static final String A2 = A + ".a2"; + protected static final String B1 = B + ".b1"; + protected static final String B2 = B + ".b2"; + protected static final String B3 = B + ".b3"; + protected static final String C1 = C + ".c1"; + protected static final String C2 = C + ".c2"; + protected static final String C3 = C + ".c3"; + protected static float A_CAPACITY = 20f; + protected static float B_CAPACITY = 40f; + protected static float C_CAPACITY = 20f; + protected static float D_CAPACITY = 20f; + protected static float A1_CAPACITY = 30; + protected static float A2_CAPACITY = 70; + protected static float B1_CAPACITY = 60f; + protected static float B2_CAPACITY = 20f; + protected static float B3_CAPACITY = 20f; + protected static float C1_CAPACITY = 20f; + protected static float C2_CAPACITY = 20f; + + protected final String USER = "user_"; + protected final String USER0 = USER + 0; + protected final String USER1 = USER + 1; + protected final String USER3 = USER + 3; + protected final String USER2 = USER + 2; + protected final String PARENT_QUEUE = "c"; + + protected final Set accessibleNodeLabelsOnC = new HashSet<>(); + + protected final String NODEL_LABEL_GPU = "GPU"; + protected final String NODEL_LABEL_SSD = "SSD"; + + protected MockRM mockRM = null; + + protected CapacityScheduler cs; + + private final TestCapacityScheduler tcs = new TestCapacityScheduler(); + + protected static SpyDispatcher dispatcher; + + protected static EventHandler rmAppEventEventHandler; + + protected static class SpyDispatcher extends AsyncDispatcher { + + protected static BlockingQueue eventQueue = + new LinkedBlockingQueue<>(); + + protected static class SpyRMAppEventHandler implements EventHandler { + public void handle(Event event) { + eventQueue.add(event); + } + } + + @Override + protected void dispatch(Event event) { + eventQueue.add(event); + } + + @Override + public EventHandler getEventHandler() { + return rmAppEventEventHandler; + } + + void spyOnNextEvent(Event expectedEvent, long timeout) + throws InterruptedException { + + Event event = eventQueue.poll(timeout, TimeUnit.MILLISECONDS); + assertEquals(expectedEvent.getType(), event.getType()); + assertEquals(expectedEvent.getClass(), event.getClass()); + } + } + + @Before + public void setUp() throws Exception { + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); + setupQueueConfiguration(conf); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + + List queuePlacementRules = new ArrayList<>(); + queuePlacementRules.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE); + conf.setQueuePlacementRules(queuePlacementRules); + + setupQueueMappings(conf); + + mockRM = new MockRM(conf); + cs = (CapacityScheduler) mockRM.getResourceScheduler(); + + dispatcher = new SpyDispatcher(); + rmAppEventEventHandler = new SpyDispatcher.SpyRMAppEventHandler(); + dispatcher.register(RMAppEventType.class, rmAppEventEventHandler); + cs.updatePlacementRules(); + mockRM.start(); + + cs.start(); + } + + protected CapacitySchedulerConfiguration setupQueueMappings( + CapacitySchedulerConfiguration conf) { + + //set queue mapping + List queueMappings = + new ArrayList<>(); + for (int i = 0; i <= 3; i++) { + //Set C as parent queue name for auto queue creation + UserGroupMappingPlacementRule.QueueMapping userQueueMapping = + new UserGroupMappingPlacementRule.QueueMapping( + UserGroupMappingPlacementRule.QueueMapping.MappingType.USER, + USER + i, getQueueMapping(PARENT_QUEUE, USER + i)); + queueMappings.add(userQueueMapping); + } + + conf.setQueueMappings(queueMappings); + //override with queue mappings + conf.setOverrideWithQueueMappings(true); + return conf; + } + + /** + * @param conf, to be modified + * @return, CS configuration which has C as an auto creation enabled parent + * queue + *

+ * root / \ \ \ a b c d / \ / | \ a1 a2 b1 + * b2 b3 + */ + protected CapacitySchedulerConfiguration setupQueueConfiguration( + CapacitySchedulerConfiguration conf) { + + //setup new queues with one of them auto enabled + // Define top-level queues + // Set childQueue for root + conf.setQueues(CapacitySchedulerConfiguration.ROOT, + new String[] { "a", "b", "c", "d" }); + + conf.setCapacity(A, A_CAPACITY); + conf.setCapacity(B, B_CAPACITY); + conf.setCapacity(C, C_CAPACITY); + conf.setCapacity(D, D_CAPACITY); + + // Define 2nd-level queues + conf.setQueues(A, new String[] { "a1", "a2" }); + conf.setCapacity(A1, A1_CAPACITY); + conf.setUserLimitFactor(A1, 100.0f); + conf.setCapacity(A2, A2_CAPACITY); + conf.setUserLimitFactor(A2, 100.0f); + + conf.setQueues(B, new String[] { "b1", "b2", "b3" }); + conf.setCapacity(B1, B1_CAPACITY); + conf.setUserLimitFactor(B1, 100.0f); + conf.setCapacity(B2, B2_CAPACITY); + conf.setUserLimitFactor(B2, 100.0f); + conf.setCapacity(B3, B3_CAPACITY); + conf.setUserLimitFactor(B3, 100.0f); + + conf.setUserLimitFactor(C, 1.0f); + conf.setAutoCreateChildQueueEnabled(C, true); + + //Setup leaf queue template configs + conf.setAutoCreatedLeafQueueConfigCapacity(C, 50.0f); + conf.setAutoCreatedLeafQueueConfigMaxCapacity(C, 100.0f); + conf.setAutoCreatedLeafQueueConfigUserLimit(C, 100); + conf.setAutoCreatedLeafQueueConfigUserLimitFactor(C, 3.0f); + + LOG.info("Setup " + C + " as an auto leaf creation enabled parent queue"); + + conf.setUserLimitFactor(D, 1.0f); + conf.setAutoCreateChildQueueEnabled(D, true); + conf.setUserLimit(D, 100); + conf.setUserLimitFactor(D, 3.0f); + + //Setup leaf queue template configs + conf.setAutoCreatedLeafQueueConfigCapacity(D, 10.0f); + conf.setAutoCreatedLeafQueueConfigMaxCapacity(D, 100.0f); + conf.setAutoCreatedLeafQueueConfigUserLimit(D, 3); + conf.setAutoCreatedLeafQueueConfigUserLimitFactor(D, 100); + + conf.set(CapacitySchedulerConfiguration.PREFIX + C + DOT + + CapacitySchedulerConfiguration + .AUTO_CREATED_LEAF_QUEUE_TEMPLATE_PREFIX + + DOT + CapacitySchedulerConfiguration.ORDERING_POLICY, + FAIR_APP_ORDERING_POLICY); + + accessibleNodeLabelsOnC.add(NODEL_LABEL_GPU); + accessibleNodeLabelsOnC.add(NODEL_LABEL_SSD); + accessibleNodeLabelsOnC.add(NO_LABEL); + + conf.setAccessibleNodeLabels(C, accessibleNodeLabelsOnC); + conf.setCapacityByLabel(C, NODEL_LABEL_GPU, 50); + conf.setCapacityByLabel(C, NODEL_LABEL_SSD, 50); + + LOG.info("Setup " + D + " as an auto leaf creation enabled parent queue"); + + return conf; + } + + @After + public void tearDown() throws Exception { + if (mockRM != null) { + mockRM.stop(); + } + } + + protected void validateCapacities(AutoCreatedLeafQueue autoCreatedLeafQueue, + float capacity, float absCapacity, float maxCapacity, + float absMaxCapacity) { + assertEquals(capacity, autoCreatedLeafQueue.getCapacity(), EPSILON); + assertEquals(absCapacity, autoCreatedLeafQueue.getAbsoluteCapacity(), + EPSILON); + assertEquals(maxCapacity, autoCreatedLeafQueue.getMaximumCapacity(), + EPSILON); + assertEquals(absMaxCapacity, + autoCreatedLeafQueue.getAbsoluteMaximumCapacity(), EPSILON); + } + + protected void cleanupQueue(String queueName) throws YarnException { + AutoCreatedLeafQueue queue = (AutoCreatedLeafQueue) cs.getQueue(queueName); + if (queue != null) { + setEntitlement(queue, new QueueEntitlement(0.0f, 0.0f)); + ((ManagedParentQueue) queue.getParent()).removeChildQueue( + queue.getQueueName()); + cs.getCapacitySchedulerQueueManager().removeQueue(queue.getQueueName()); + } + } + + protected ApplicationId submitApp(MockRM rm, CSQueue parentQueue, + String leafQueueName, String user, int expectedNumAppsInParentQueue, + int expectedNumAppsInLeafQueue) throws Exception { + + CapacityScheduler capacityScheduler = + (CapacityScheduler) rm.getResourceScheduler(); + // submit an app + RMApp rmApp = rm.submitApp(GB, "test-auto-queue-activation", user, null, + leafQueueName); + + // check preconditions + List appsInParentQueue = + capacityScheduler.getAppsInQueue(parentQueue.getQueueName()); + assertEquals(expectedNumAppsInParentQueue, appsInParentQueue.size()); + + List appsInLeafQueue = + capacityScheduler.getAppsInQueue(leafQueueName); + assertEquals(expectedNumAppsInLeafQueue, appsInLeafQueue.size()); + + return rmApp.getApplicationId(); + } + + protected List setupQueueMapping( + CapacityScheduler newCS, String user, String parentQueue, String queue) { + List queueMappings = + new ArrayList<>(); + queueMappings.add(new UserGroupMappingPlacementRule.QueueMapping( + UserGroupMappingPlacementRule.QueueMapping.MappingType.USER, user, + getQueueMapping(parentQueue, queue))); + newCS.getConfiguration().setQueueMappings(queueMappings); + return queueMappings; + } + + protected MockRM setupSchedulerInstance() { + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); + setupQueueConfiguration(conf); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + + List queuePlacementRules = new ArrayList(); + queuePlacementRules.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE); + conf.setQueuePlacementRules(queuePlacementRules); + + setupQueueMappings(conf); + + MockRM newMockRM = new MockRM(conf); + newMockRM.start(); + ((CapacityScheduler) newMockRM.getResourceScheduler()).start(); + return newMockRM; + } + + protected void checkQueueCapacities(CapacityScheduler newCS, float capacityC, + float capacityD) { + CSQueue rootQueue = newCS.getRootQueue(); + CSQueue queueC = tcs.findQueue(rootQueue, C); + CSQueue queueD = tcs.findQueue(rootQueue, D); + CSQueue queueC1 = tcs.findQueue(queueC, C1); + CSQueue queueC2 = tcs.findQueue(queueC, C2); + CSQueue queueC3 = tcs.findQueue(queueC, C3); + + float capC = capacityC / 100.0f; + float capD = capacityD / 100.0f; + + tcs.checkQueueCapacity(queueC, capC, capC, 1.0f, 1.0f); + tcs.checkQueueCapacity(queueD, capD, capD, 1.0f, 1.0f); + tcs.checkQueueCapacity(queueC1, C1_CAPACITY / 100.0f, + (C1_CAPACITY / 100.0f) * capC, 1.0f, 1.0f); + tcs.checkQueueCapacity(queueC2, C2_CAPACITY / 100.0f, + (C2_CAPACITY / 100.0f) * capC, 1.0f, 1.0f); + + if (queueC3 != null) { + ManagedParentQueue parentQueue = (ManagedParentQueue) queueC; + QueueCapacities cap = + parentQueue.getLeafQueueTemplate().getQueueCapacities(); + tcs.checkQueueCapacity(queueC3, cap.getCapacity(), + (cap.getCapacity()) * capC, 1.0f, 1.0f); + } + } + + String getQueueMapping(String parentQueue, String leafQueue) { + return parentQueue + DOT + leafQueue; + } + + protected ApplicationAttemptId submitApp(CapacityScheduler newCS, String user, + String queue, String parentQueue) { + ApplicationId appId = BuilderUtils.newApplicationId(1, 1); + SchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, queue, user, + new ApplicationPlacementContext(queue, parentQueue)); + ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( + appId, 1); + SchedulerEvent addAttemptEvent = new AppAttemptAddedSchedulerEvent( + appAttemptId, false); + newCS.handle(addAppEvent); + newCS.handle(addAttemptEvent); + return appAttemptId; + } + + protected RMApp submitApp(String user, String queue, String nodeLabel) + throws Exception { + RMApp app = mockRM.submitApp(GB, + "test-auto-queue-creation" + RandomUtils.nextInt(100), user, null, + queue, nodeLabel); + Assert.assertEquals(app.getAmNodeLabelExpression(), nodeLabel); + // check preconditions + List appsInC = cs.getAppsInQueue(PARENT_QUEUE); + assertEquals(1, appsInC.size()); + assertNotNull(cs.getQueue(queue)); + + return app; + } + + void setEntitlement(AutoCreatedLeafQueue queue, + QueueEntitlement entitlement) { + queue.setCapacity(entitlement.getCapacity()); + queue.setAbsoluteCapacity( + queue.getParent().getAbsoluteCapacity() * entitlement.getCapacity()); + // note: we currently set maxCapacity to capacity + // this might be revised later + queue.setMaxCapacity(entitlement.getMaxCapacity()); + } + + protected void validateUserAndAppLimits( + AutoCreatedLeafQueue autoCreatedLeafQueue, int maxApps, + int maxAppsPerUser) { + assertEquals(maxApps, autoCreatedLeafQueue.getMaxApplications()); + assertEquals(maxAppsPerUser, + autoCreatedLeafQueue.getMaxApplicationsPerUser()); + } + + protected void validateInitialQueueEntitlement(CSQueue parentQueue, + String leafQueueName, float expectedTotalChildQueueAbsCapacity) + throws SchedulerDynamicEditException { + validateInitialQueueEntitlement(cs, parentQueue, leafQueueName, + expectedTotalChildQueueAbsCapacity); + } + + protected void validateInitialQueueEntitlement( + CapacityScheduler capacityScheduler, CSQueue parentQueue, + String leafQueueName, float expectedTotalChildQueueAbsCapacity) + throws SchedulerDynamicEditException { + ManagedParentQueue autoCreateEnabledParentQueue = + (ManagedParentQueue) parentQueue; + + GuaranteedOrZeroCapacityOverTimePolicy policy = + (GuaranteedOrZeroCapacityOverTimePolicy) autoCreateEnabledParentQueue + .getAutoCreatedQueueManagementPolicy(); + + assertEquals(expectedTotalChildQueueAbsCapacity, + policy.getAbsoluteActivatedChildQueueCapacity(), EPSILON); + + AutoCreatedLeafQueue leafQueue = + (AutoCreatedLeafQueue) capacityScheduler.getQueue(leafQueueName); + + for (String label : accessibleNodeLabelsOnC) { + validateCapacitiesByLabel(autoCreateEnabledParentQueue, leafQueue, label); + } + + assertEquals(true, policy.isActive(leafQueue)); + } + + protected void validateCapacitiesByLabel( + ManagedParentQueue autoCreateEnabledParentQueue, + AutoCreatedLeafQueue leafQueue, String label) { + assertEquals( + autoCreateEnabledParentQueue.getLeafQueueTemplate().getQueueCapacities() + .getCapacity(), leafQueue.getQueueCapacities().getCapacity(label), + EPSILON); + assertEquals( + autoCreateEnabledParentQueue.getLeafQueueTemplate().getQueueCapacities() + .getMaximumCapacity(), + leafQueue.getQueueCapacities().getMaximumCapacity(label), EPSILON); + } + + protected void validateActivatedQueueEntitlement(CSQueue parentQueue, + String leafQueueName, float expectedTotalChildQueueAbsCapacity, + List queueManagementChanges) + throws SchedulerDynamicEditException { + ManagedParentQueue autoCreateEnabledParentQueue = + (ManagedParentQueue) parentQueue; + + GuaranteedOrZeroCapacityOverTimePolicy policy = + (GuaranteedOrZeroCapacityOverTimePolicy) autoCreateEnabledParentQueue + .getAutoCreatedQueueManagementPolicy(); + + QueueCapacities cap = autoCreateEnabledParentQueue.getLeafQueueTemplate() + .getQueueCapacities(); + QueueEntitlement expectedEntitlement = new QueueEntitlement( + cap.getCapacity(), cap.getMaximumCapacity()); + + //validate capacity + validateQueueEntitlements(leafQueueName, expectedEntitlement, + queueManagementChanges); + + //validate parent queue state + assertEquals(expectedTotalChildQueueAbsCapacity, + policy.getAbsoluteActivatedChildQueueCapacity(), EPSILON); + + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) cs.getQueue( + leafQueueName); + + //validate leaf queue state + assertEquals(true, policy.isActive(leafQueue)); + } + + protected void validateDeactivatedQueueEntitlement(CSQueue parentQueue, + String leafQueueName, float expectedTotalChildQueueAbsCapacity, + List queueManagementChanges) + throws SchedulerDynamicEditException { + QueueEntitlement expectedEntitlement = new QueueEntitlement(0.0f, 1.0f); + + ManagedParentQueue autoCreateEnabledParentQueue = + (ManagedParentQueue) parentQueue; + + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) cs.getQueue( + leafQueueName); + + GuaranteedOrZeroCapacityOverTimePolicy policy = + (GuaranteedOrZeroCapacityOverTimePolicy) autoCreateEnabledParentQueue + .getAutoCreatedQueueManagementPolicy(); + + //validate parent queue state + assertEquals(expectedTotalChildQueueAbsCapacity, + policy.getAbsoluteActivatedChildQueueCapacity(), EPSILON); + + //validate leaf queue state + assertEquals(false, policy.isActive(leafQueue)); + + //validate capacity + validateQueueEntitlements(leafQueueName, expectedEntitlement, + queueManagementChanges); + } + + private void validateQueueEntitlements(String leafQueueName, + QueueEntitlement expectedEntitlement, + List queueEntitlementChanges) { + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) cs.getQueue( + leafQueueName); + validateQueueEntitlementChangesForLeafQueue(leafQueue, expectedEntitlement, + queueEntitlementChanges); + } + + private void validateQueueEntitlementChangesForLeafQueue(CSQueue leafQueue, + QueueEntitlement expectedQueueEntitlement, + final List queueEntitlementChanges) { + boolean found = false; + for (QueueManagementChange entitlementChange : queueEntitlementChanges) { + if (leafQueue.getQueueName().equals( + entitlementChange.getQueue().getQueueName())) { + + AutoCreatedLeafQueueConfig updatedQueueTemplate = + entitlementChange.getUpdatedQueueTemplate(); + + for (String label : accessibleNodeLabelsOnC) { + QueueEntitlement newEntitlement = new QueueEntitlement( + updatedQueueTemplate.getQueueCapacities().getCapacity(label), + updatedQueueTemplate.getQueueCapacities() + .getMaximumCapacity(label)); + assertEquals(expectedQueueEntitlement, newEntitlement); + } + found = true; + break; + } + } + if (!found) { + fail("Could not find the specified leaf queue in entitlement changes : " + + leafQueue.getQueueName()); + } + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java index 7090bc929f8..049a93246fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerAutoQueueCreation.java @@ -26,51 +26,54 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.event.AsyncDispatcher; -import org.apache.hadoop.yarn.event.Event; -import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; -import org.apache.hadoop.yarn.server.resourcemanager.placement.ApplicationPlacementContext; -import org.apache.hadoop.yarn.server.resourcemanager.placement.UserGroupMappingPlacementRule; +import org.apache.hadoop.yarn.server.resourcemanager.placement + .ApplicationPlacementContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt + .RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; -import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; -import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerDynamicEditException; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .queuemanagement.GuaranteedOrZeroCapacityOverTimePolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common + .QueueEntitlement; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .NodeAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .SchedulerEvent; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy + .FairOrderingPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.security + .ClientToAMTokenSecretManagerInRM; +import org.apache.hadoop.yarn.server.resourcemanager.security + .NMTokenSecretManagerInRM; +import org.apache.hadoop.yarn.server.resourcemanager.security + .RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; -import org.junit.After; -import org.junit.Before; import org.junit.Test; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.TimeUnit; -import static org.apache.hadoop.yarn.server.resourcemanager.placement.UserGroupMappingPlacementRule.CURRENT_USER_MAPPING; -import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueueUtils.EPSILON; -import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.DOT; +import static org.apache.hadoop.yarn.server.resourcemanager.placement + .UserGroupMappingPlacementRule.CURRENT_USER_MAPPING; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CSQueueUtils.EPSILON; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -79,198 +82,14 @@ import static org.mockito.Mockito.when; /** - * Tests for creation and reinitilization of auto created leaf queues + * Tests for creation and reinitialization of auto created leaf queues * under a ManagedParentQueue. */ -public class TestCapacitySchedulerAutoQueueCreation { - - private static final Log LOG = LogFactory.getLog(TestCapacityScheduler.class); - private final int GB = 1024; - private final static ContainerUpdates NULL_UPDATE_REQUESTS = - new ContainerUpdates(); - - private static final String A = CapacitySchedulerConfiguration.ROOT + ".a"; - private static final String B = CapacitySchedulerConfiguration.ROOT + ".b"; - private static final String C = CapacitySchedulerConfiguration.ROOT + ".c"; - private static final String D = CapacitySchedulerConfiguration.ROOT + ".d"; - private static final String A1 = A + ".a1"; - private static final String A2 = A + ".a2"; - private static final String B1 = B + ".b1"; - private static final String B2 = B + ".b2"; - private static final String B3 = B + ".b3"; - private static final String C1 = C + ".c1"; - private static final String C2 = C + ".c2"; - private static final String C3 = C + ".c3"; - private static float A_CAPACITY = 20f; - private static float B_CAPACITY = 40f; - private static float C_CAPACITY = 20f; - private static float D_CAPACITY = 20f; - private static float A1_CAPACITY = 30; - private static float A2_CAPACITY = 70; - private static float B1_CAPACITY = 60f; - private static float B2_CAPACITY = 20f; - private static float B3_CAPACITY = 20f; - private static float C1_CAPACITY = 20f; - private static float C2_CAPACITY = 20f; - - private static String USER = "user_"; - private static String USER0 = USER + 0; - private static String USER2 = USER + 2; - private static String PARENT_QUEUE = "c"; - - private MockRM mockRM = null; - - private CapacityScheduler cs; - - private final TestCapacityScheduler tcs = new TestCapacityScheduler(); - - private static SpyDispatcher dispatcher; - - private static EventHandler rmAppEventEventHandler; - - private static class SpyDispatcher extends AsyncDispatcher { - - private static BlockingQueue eventQueue = - new LinkedBlockingQueue<>(); - - private static class SpyRMAppEventHandler implements EventHandler { - public void handle(Event event) { - eventQueue.add(event); - } - } - - @Override - protected void dispatch(Event event) { - eventQueue.add(event); - } - - @Override - public EventHandler getEventHandler() { - return rmAppEventEventHandler; - } - - void spyOnNextEvent(Event expectedEvent, long timeout) - throws InterruptedException { - - Event event = eventQueue.poll(timeout, TimeUnit.MILLISECONDS); - assertEquals(expectedEvent.getType(), event.getType()); - assertEquals(expectedEvent.getClass(), event.getClass()); - } - } - - @Before - public void setUp() throws Exception { - CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); - setupQueueConfiguration(conf); - conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, - ResourceScheduler.class); - - List queuePlacementRules = new ArrayList<>(); - queuePlacementRules.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE); - conf.setQueuePlacementRules(queuePlacementRules); - - setupQueueMappings(conf); - - mockRM = new MockRM(conf); - cs = (CapacityScheduler) mockRM.getResourceScheduler(); - - dispatcher = new SpyDispatcher(); - rmAppEventEventHandler = new SpyDispatcher.SpyRMAppEventHandler(); - dispatcher.register(RMAppEventType.class, rmAppEventEventHandler); - cs.updatePlacementRules(); - mockRM.start(); - - cs.start(); - } - - private CapacitySchedulerConfiguration setupQueueMappings( - CapacitySchedulerConfiguration conf) { - - //set queue mapping - List queueMappings = - new ArrayList<>(); - for (int i = 0; i <= 3; i++) { - //Set C as parent queue name for auto queue creation - UserGroupMappingPlacementRule.QueueMapping userQueueMapping = - new UserGroupMappingPlacementRule.QueueMapping( - UserGroupMappingPlacementRule.QueueMapping.MappingType.USER, - USER + i, getQueueMapping(PARENT_QUEUE, USER + i)); - queueMappings.add(userQueueMapping); - } +public class TestCapacitySchedulerAutoQueueCreation + extends TestCapacitySchedulerAutoCreatedQueueBase { - conf.setQueueMappings(queueMappings); - //override with queue mappings - conf.setOverrideWithQueueMappings(true); - return conf; - } - - /** - * @param conf, to be modified - * @return, CS configuration which has C - * as an auto creation enabled parent queue - *

- * root - * / \ \ \ - * a b c d - * / \ / | \ - * a1 a2 b1 b2 b3 - */ - private CapacitySchedulerConfiguration setupQueueConfiguration( - CapacitySchedulerConfiguration conf) { - - //setup new queues with one of them auto enabled - // Define top-level queues - // Set childQueue for root - conf.setQueues(CapacitySchedulerConfiguration.ROOT, - new String[] { "a", "b", "c", "d" }); - - conf.setCapacity(A, A_CAPACITY); - conf.setCapacity(B, B_CAPACITY); - conf.setCapacity(C, C_CAPACITY); - conf.setCapacity(D, D_CAPACITY); - - // Define 2nd-level queues - conf.setQueues(A, new String[] { "a1", "a2" }); - conf.setCapacity(A1, A1_CAPACITY); - conf.setUserLimitFactor(A1, 100.0f); - conf.setCapacity(A2, A2_CAPACITY); - conf.setUserLimitFactor(A2, 100.0f); - - conf.setQueues(B, new String[] { "b1", "b2", "b3" }); - conf.setCapacity(B1, B1_CAPACITY); - conf.setUserLimitFactor(B1, 100.0f); - conf.setCapacity(B2, B2_CAPACITY); - conf.setUserLimitFactor(B2, 100.0f); - conf.setCapacity(B3, B3_CAPACITY); - conf.setUserLimitFactor(B3, 100.0f); - - conf.setUserLimitFactor(C, 1.0f); - conf.setAutoCreateChildQueueEnabled(C, true); - - //Setup leaf queue template configs - conf.setAutoCreatedLeafQueueTemplateCapacity(C, 50.0f); - conf.setAutoCreatedLeafQueueTemplateMaxCapacity(C, 100.0f); - - LOG.info("Setup " + C + " as an auto leaf creation enabled parent queue"); - - conf.setUserLimitFactor(D, 1.0f); - conf.setAutoCreateChildQueueEnabled(D, true); - - //Setup leaf queue template configs - conf.setAutoCreatedLeafQueueTemplateCapacity(D, 10.0f); - conf.setAutoCreatedLeafQueueTemplateMaxCapacity(D, 100.0f); - - LOG.info("Setup " + D + " as an auto leaf creation enabled parent queue"); - - return conf; - } - - @After - public void tearDown() throws Exception { - if (mockRM != null) { - mockRM.stop(); - } - } + private static final Log LOG = LogFactory.getLog( + TestCapacitySchedulerAutoQueueCreation.class); @Test(timeout = 10000) public void testAutoCreateLeafQueueCreation() throws Exception { @@ -289,7 +108,11 @@ public void testAutoCreateLeafQueueCreation() throws Exception { ManagedParentQueue parentQueue = (ManagedParentQueue) cs.getQueue( PARENT_QUEUE); assertEquals(parentQueue, autoCreatedLeafQueue.getParent()); - validateCapacities(autoCreatedLeafQueue); + validateInitialQueueEntitlement(parentQueue, USER0, 0.1f); + validateUserAndAppLimits(autoCreatedLeafQueue, 1000, 1000); + + assertTrue(autoCreatedLeafQueue + .getOrderingPolicy() instanceof FairOrderingPolicy); } finally { cleanupQueue(USER0); } @@ -297,7 +120,6 @@ public void testAutoCreateLeafQueueCreation() throws Exception { @Test public void testReinitializeStoppedAutoCreatedLeafQueue() throws Exception { - try { String host = "127.0.0.1"; RMNode node = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, @@ -306,20 +128,28 @@ public void testReinitializeStoppedAutoCreatedLeafQueue() throws Exception { // submit an app - RMApp app = mockRM.submitApp(GB, "test-auto-queue-creation-1", USER0, + RMApp app1 = mockRM.submitApp(GB, "test-auto-queue-creation-1", USER0, null, USER0); + + RMApp app2 = mockRM.submitApp(GB, "test-auto-queue-creation-2", USER1, + null, USER1); // check preconditions List appsInC = cs.getAppsInQueue(PARENT_QUEUE); - assertEquals(1, appsInC.size()); + assertEquals(2, appsInC.size()); assertNotNull(cs.getQueue(USER0)); + assertNotNull(cs.getQueue(USER1)); - AutoCreatedLeafQueue autoCreatedLeafQueue = - (AutoCreatedLeafQueue) cs.getQueue(USER0); + AutoCreatedLeafQueue user0Queue = (AutoCreatedLeafQueue) cs.getQueue( + USER0); + AutoCreatedLeafQueue user1Queue = (AutoCreatedLeafQueue) cs.getQueue( + USER0); ManagedParentQueue parentQueue = (ManagedParentQueue) cs.getQueue( PARENT_QUEUE); - assertEquals(parentQueue, autoCreatedLeafQueue.getParent()); - validateCapacities(autoCreatedLeafQueue); + assertEquals(parentQueue, user0Queue.getParent()); + assertEquals(parentQueue, user1Queue.getParent()); + validateInitialQueueEntitlement(parentQueue, USER0, 0.2f); + validateInitialQueueEntitlement(parentQueue, USER1, 0.2f); ApplicationAttemptId appAttemptId = appsInC.get(0); @@ -337,7 +167,7 @@ public void testReinitializeStoppedAutoCreatedLeafQueue() throws Exception { CapacityScheduler.schedule(cs); //change state to draining - autoCreatedLeafQueue.stopQueue(); + user0Queue.stopQueue(); cs.killAllAppsInQueue(USER0); @@ -346,80 +176,24 @@ public void testReinitializeStoppedAutoCreatedLeafQueue() throws Exception { mockRM.waitForState(appAttemptId.getApplicationId(), RMAppState.KILLED); //change state to stopped - autoCreatedLeafQueue.stopQueue(); + user0Queue.stopQueue(); assertEquals(QueueState.STOPPED, - autoCreatedLeafQueue.getQueueInfo().getQueueState()); + user0Queue.getQueueInfo().getQueueState()); cs.reinitialize(cs.getConf(), mockRM.getRMContext()); - AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) cs.getQueue( - USER0); - validateCapacities(leafQueue); - - } finally { - cleanupQueue(USER0); - } - } - - @Test - public void testRefreshQueuesWithAutoCreatedLeafQueues() throws Exception { - - MockRM newMockRM = setupSchedulerInstance(); - try { - CapacityScheduler newCS = - (CapacityScheduler) newMockRM.getResourceScheduler(); - CapacitySchedulerConfiguration conf = newCS.getConfiguration(); - - // Test add one auto created queue dynamically and manually modify - // capacity - ManagedParentQueue parentQueue = (ManagedParentQueue) newCS.getQueue("c"); - AutoCreatedLeafQueue c1 = new AutoCreatedLeafQueue(newCS, "c1", - parentQueue); - newCS.addQueue(c1); - c1.setEntitlement(new QueueEntitlement(C1_CAPACITY / 100, 1f)); - - // Test add another auto created queue and use setEntitlement to modify - // capacity - AutoCreatedLeafQueue c2 = new AutoCreatedLeafQueue(newCS, "c2", - (ManagedParentQueue) newCS.getQueue("c")); - newCS.addQueue(c2); - newCS.setEntitlement("c2", new QueueEntitlement(C2_CAPACITY / 100, 1f)); - - // Verify all allocations match - checkQueueCapacities(newCS, C_CAPACITY, D_CAPACITY); - - // Reinitialize and verify all dynamic queued survived - - conf.setCapacity(A, 20f); - conf.setCapacity(B, 20f); - conf.setCapacity(C, 40f); - conf.setCapacity(D, 20f); - newCS.reinitialize(conf, newMockRM.getRMContext()); - - checkQueueCapacities(newCS, 40f, 20f); - - //chnage parent template configs and reinitialize - conf.setAutoCreatedLeafQueueTemplateCapacity(C, 30.0f); - conf.setAutoCreatedLeafQueueTemplateMaxCapacity(C, 100.0f); - newCS.reinitialize(conf, newMockRM.getRMContext()); + AutoCreatedLeafQueue user0QueueReinited = + (AutoCreatedLeafQueue) cs.getQueue(USER0); - ManagedParentQueue c = (ManagedParentQueue) newCS.getQueue("c"); - AutoCreatedLeafQueue c3 = new AutoCreatedLeafQueue(newCS, "c3", c); - newCS.addQueue(c3); + validateCapacities(user0QueueReinited, 0.0f, 0.0f, 1.0f, 1.0f); - AbstractManagedParentQueue.AutoCreatedLeafQueueTemplate - leafQueueTemplate = parentQueue.getLeafQueueTemplate(); - QueueCapacities cap = leafQueueTemplate.getQueueCapacities(); - c3.setEntitlement( - new QueueEntitlement(cap.getCapacity(), cap.getMaximumCapacity())); - newCS.reinitialize(conf, newMockRM.getRMContext()); + AutoCreatedLeafQueue leafQueue = (AutoCreatedLeafQueue) cs.getQueue( + USER1); + validateInitialQueueEntitlement(parentQueue, leafQueue.getQueueName(), + 0.1f); - checkQueueCapacities(newCS, 40f, 20f); } finally { - if (newMockRM != null) { - ((CapacityScheduler) newMockRM.getResourceScheduler()).stop(); - newMockRM.stop(); - } + cleanupQueue(USER0); } } @@ -460,7 +234,7 @@ public void testConvertLeafQueueToParentQueueWithAutoCreate() CapacitySchedulerConfiguration newConf = new CapacitySchedulerConfiguration(); setupQueueConfiguration(newConf); - newConf.setAutoCreatedLeafQueueTemplateCapacity(A1, A1_CAPACITY / 10); + newConf.setAutoCreatedLeafQueueConfigCapacity(A1, A1_CAPACITY / 10); newConf.setAutoCreateChildQueueEnabled(A1, true); newCS.setConf(new YarnConfiguration()); @@ -490,7 +264,7 @@ public void testConvertFailsFromParentQueueToManagedParentQueue() CapacitySchedulerConfiguration newConf = new CapacitySchedulerConfiguration(); setupQueueConfiguration(newConf); - newConf.setAutoCreatedLeafQueueTemplateCapacity(A, A_CAPACITY / 10); + newConf.setAutoCreatedLeafQueueConfigCapacity(A, A_CAPACITY / 10); newConf.setAutoCreateChildQueueEnabled(A, true); newCS.setConf(new YarnConfiguration()); @@ -531,39 +305,6 @@ public void testAutoCreateLeafQueueFailsWithNoQueueMapping() assertEquals(RMAppState.FAILED, app.getState()); } - private void validateCapacities(AutoCreatedLeafQueue autoCreatedLeafQueue) { - assertEquals(autoCreatedLeafQueue.getCapacity(), 0.0f, EPSILON); - assertEquals(autoCreatedLeafQueue.getAbsoluteCapacity(), 0.0f, EPSILON); - assertEquals(autoCreatedLeafQueue.getMaximumCapacity(), 0.0f, EPSILON); - assertEquals(autoCreatedLeafQueue.getAbsoluteMaximumCapacity(), 0.0f, - EPSILON); - int maxAppsForAutoCreatedQueues = (int) ( - CapacitySchedulerConfiguration.DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS - * autoCreatedLeafQueue.getParent().getAbsoluteCapacity()); - assertEquals(autoCreatedLeafQueue.getMaxApplicationsPerUser(), - maxAppsForAutoCreatedQueues); - assertEquals(autoCreatedLeafQueue.getMaxApplicationsPerUser(), - (int) (maxAppsForAutoCreatedQueues * (cs.getConfiguration() - .getUserLimitFactor( - autoCreatedLeafQueue.getParent().getQueuePath())))); - } - - private void cleanupQueue(String queueName) throws YarnException { - AutoCreatedLeafQueue queue = (AutoCreatedLeafQueue) cs.getQueue(queueName); - if (queue != null) { - queue.setEntitlement(new QueueEntitlement(0.0f, 0.0f)); - ((ManagedParentQueue) queue.getParent()).removeChildQueue( - queue.getQueueName()); - cs.getCapacitySchedulerQueueManager().removeQueue(queue.getQueueName()); - } else{ - throw new YarnException("Queue does not exist " + queueName); - } - } - - String getQueueMapping(String parentQueue, String leafQueue) { - return parentQueue + DOT + leafQueue; - } - @Test(timeout = 10000) public void testQueueMappingValidationFailsWithInvalidParentQueueInMapping() throws Exception { @@ -586,8 +327,7 @@ public void testQueueMappingValidationFailsWithInvalidParentQueueInMapping() //expected exception assertTrue(e.getMessage().contains( "invalid parent queue which does not have auto creation of leaf " - + "queues enabled [" - + "a" + "]")); + + "queues enabled [" + "a" + "]")); } //"a" is not auto create enabled and app_user does not exist as a leaf @@ -650,9 +390,6 @@ public void testParentQueueUpdateInQueueMappingFailsAfterAutoCreation() (CapacityScheduler) newMockRM.getResourceScheduler(); try { - newMockRM.start(); - newCS.start(); - submitApp(newCS, USER0, USER0, PARENT_QUEUE); assertNotNull(newCS.getQueue(USER0)); @@ -700,12 +437,16 @@ public void testAutoCreationFailsWhenParentCapacityExceeded() AutoCreatedLeafQueue c1 = new AutoCreatedLeafQueue(newCS, "c1", parentQueue); newCS.addQueue(c1); - c1.setEntitlement(new QueueEntitlement(0.5f, 1f)); + c1.setCapacity(0.5f); + c1.setAbsoluteCapacity(c1.getParent().getAbsoluteCapacity() * 1f); + c1.setMaxCapacity(1f); + + setEntitlement(c1, new QueueEntitlement(0.5f, 1f)); AutoCreatedLeafQueue c2 = new AutoCreatedLeafQueue(newCS, "c2", parentQueue); newCS.addQueue(c2); - c2.setEntitlement(new QueueEntitlement(0.5f, 1f)); + setEntitlement(c2, new QueueEntitlement(0.5f, 1f)); try { AutoCreatedLeafQueue c3 = new AutoCreatedLeafQueue(newCS, "c3", @@ -723,72 +464,160 @@ public void testAutoCreationFailsWhenParentCapacityExceeded() } } - private List setupQueueMapping( - CapacityScheduler newCS, String user, String parentQueue, String queue) { - List queueMappings = - new ArrayList<>(); - queueMappings.add(new UserGroupMappingPlacementRule.QueueMapping( - UserGroupMappingPlacementRule.QueueMapping.MappingType.USER, user, - getQueueMapping(parentQueue, queue))); - newCS.getConfiguration().setQueueMappings(queueMappings); - return queueMappings; - } + @Test + public void testAutoCreatedQueueActivationDeactivation() throws Exception { + + try { + String host = "127.0.0.1"; + RMNode node = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, + host); + cs.handle(new NodeAddedSchedulerEvent(node)); + + CSQueue parentQueue = cs.getQueue(PARENT_QUEUE); + + //submit app1 as USER1 + submitApp(mockRM, parentQueue, USER1, USER1, 1, 1); + validateInitialQueueEntitlement(parentQueue, USER1, 0.1f); + + //submit another app2 as USER2 + ApplicationId user2AppId = submitApp(mockRM, parentQueue, USER2, USER2, 2, + 1); + validateInitialQueueEntitlement(parentQueue, USER2, 0.2f); + + //submit another app3 as USER1 + submitApp(mockRM, parentQueue, USER1, USER1, 3, 2); + + //validate total activated abs capacity remains the same + GuaranteedOrZeroCapacityOverTimePolicy autoCreatedQueueManagementPolicy = + (GuaranteedOrZeroCapacityOverTimePolicy) ((ManagedParentQueue) + parentQueue) + .getAutoCreatedQueueManagementPolicy(); + assertEquals(autoCreatedQueueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity(), 0.2f, EPSILON); + + //submit user_3 app. This cant be scheduled since there is no capacity + submitApp(mockRM, parentQueue, USER3, USER3, 4, 1); + final CSQueue user3LeafQueue = cs.getQueue(USER3); + validateCapacities((AutoCreatedLeafQueue) user3LeafQueue, 0.0f, 0.0f, + 1.0f, 1.0f); - private MockRM setupSchedulerInstance() { - CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); - setupQueueConfiguration(conf); - conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, - ResourceScheduler.class); + assertEquals(autoCreatedQueueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity(), 0.2f, EPSILON); - List queuePlacementRules = new ArrayList(); - queuePlacementRules.add(YarnConfiguration.USER_GROUP_PLACEMENT_RULE); - conf.setQueuePlacementRules(queuePlacementRules); + //deactivate USER2 queue + cs.killAllAppsInQueue(USER2); + mockRM.waitForState(user2AppId, RMAppState.KILLED); - setupQueueMappings(conf); + //Verify if USER_2 can be deactivated since it has no pending appsA + List queueManagementChanges = + autoCreatedQueueManagementPolicy.computeQueueManagementChanges(); - MockRM newMockRM = new MockRM(conf); - return newMockRM; + ManagedParentQueue managedParentQueue = (ManagedParentQueue) parentQueue; + managedParentQueue.validateAndApplyQueueManagementChanges( + queueManagementChanges); + + validateDeactivatedQueueEntitlement(parentQueue, USER2, 0.2f, + queueManagementChanges); + + //USER_3 should now get activated + validateActivatedQueueEntitlement(parentQueue, USER3, 0.2f, + queueManagementChanges); + + } finally { + cleanupQueue(USER1); + cleanupQueue(USER2); + cleanupQueue(USER3); + } } - void checkQueueCapacities(CapacityScheduler newCS, float capacityC, - float capacityD) { - CSQueue rootQueue = newCS.getRootQueue(); - CSQueue queueC = tcs.findQueue(rootQueue, C); - CSQueue queueD = tcs.findQueue(rootQueue, D); - CSQueue queueC1 = tcs.findQueue(queueC, C1); - CSQueue queueC2 = tcs.findQueue(queueC, C2); - CSQueue queueC3 = tcs.findQueue(queueC, C3); - - float capC = capacityC / 100.0f; - float capD = capacityD / 100.0f; - - tcs.checkQueueCapacity(queueC, capC, capC, 1.0f, 1.0f); - tcs.checkQueueCapacity(queueD, capD, capD, 1.0f, 1.0f); - tcs.checkQueueCapacity(queueC1, C1_CAPACITY / 100.0f, - (C1_CAPACITY / 100.0f) * capC, 1.0f, 1.0f); - tcs.checkQueueCapacity(queueC2, C2_CAPACITY / 100.0f, - (C2_CAPACITY / 100.0f) * capC, 1.0f, 1.0f); - - if (queueC3 != null) { - ManagedParentQueue parentQueue = (ManagedParentQueue) queueC; - QueueCapacities cap = - parentQueue.getLeafQueueTemplate().getQueueCapacities(); - tcs.checkQueueCapacity(queueC3, cap.getCapacity(), - (cap.getCapacity()) * capC, 1.0f, 1.0f); + @Test + public void testAutoCreatedQueueInheritsNodeLabels() throws Exception { + + try { + String host = "127.0.0.1"; + RMNode node = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, + host); + cs.handle(new NodeAddedSchedulerEvent(node)); + + CSQueue parentQueue = cs.getQueue(PARENT_QUEUE); + + submitApp(USER1, USER1, NODEL_LABEL_GPU); + //submit app1 as USER1 + validateInitialQueueEntitlement(parentQueue, USER1, 0.1f); + } finally { + cleanupQueue(USER1); } } - ApplicationAttemptId submitApp(CapacityScheduler newCS, String user, - String queue, String parentQueue) { - ApplicationId appId = BuilderUtils.newApplicationId(1, 1); - SchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, queue, user, - new ApplicationPlacementContext(queue, parentQueue)); - ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( - appId, 1); - SchedulerEvent addAttemptEvent = new AppAttemptAddedSchedulerEvent( - appAttemptId, false); - newCS.handle(addAppEvent); - newCS.handle(addAttemptEvent); - return appAttemptId; + @Test + public void testReinitializeQueuesWithAutoCreatedLeafQueues() + throws Exception { + + MockRM newMockRM = setupSchedulerInstance(); + try { + CapacityScheduler newCS = + (CapacityScheduler) newMockRM.getResourceScheduler(); + CapacitySchedulerConfiguration conf = newCS.getConfiguration(); + + String host = "127.0.0.1"; + RMNode node = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, + host); + newCS.handle(new NodeAddedSchedulerEvent(node)); + + CSQueue parentQueue = newCS.getQueue(PARENT_QUEUE); + + //submit app1 as USER1 + submitApp(newMockRM, parentQueue, USER1, USER1, 1, 1); + validateInitialQueueEntitlement(newCS, parentQueue, USER1, 0.1f); + + //submit another app2 as USER2 + ApplicationId user2AppId = submitApp(newMockRM, parentQueue, USER2, USER2, + 2, 1); + validateInitialQueueEntitlement(newCS, parentQueue, USER2, 0.2f); + + //update parent queue capacity + conf.setCapacity(C, 30f); + conf.setCapacity(D, 10f); + conf.setMaximumCapacity(C, 50f); + + newCS.reinitialize(conf, newMockRM.getRMContext()); + + // validate that leaf queues abs capacity is now changed + AutoCreatedLeafQueue user0Queue = (AutoCreatedLeafQueue) newCS.getQueue( + USER1); + validateCapacities(user0Queue, 0.5f, 0.15f, 1.0f, 0.5f); + validateUserAndAppLimits(user0Queue, 1500, 1500); + + //update leaf queue template capacities + conf.setAutoCreatedLeafQueueConfigCapacity(C, 30f); + conf.setAutoCreatedLeafQueueConfigMaxCapacity(C, 40f); + + newCS.reinitialize(conf, newMockRM.getRMContext()); + validateCapacities(user0Queue, 0.3f, 0.09f, 0.4f, 0.2f); + validateUserAndAppLimits(user0Queue, 900, 900); + + //submit app1 as USER3 + submitApp(newMockRM, parentQueue, USER3, USER3, 3, 1); + validateInitialQueueEntitlement(newCS, parentQueue, USER3, 0.27f); + AutoCreatedLeafQueue user3Queue = (AutoCreatedLeafQueue) newCS.getQueue( + USER1); + validateCapacities(user3Queue, 0.3f, 0.09f, 0.4f, 0.2f); + validateUserAndAppLimits(user3Queue, 900, 900); + + //submit app1 as USER1 - is already activated. there should be no diff + // in capacities + submitApp(newMockRM, parentQueue, USER3, USER3, 4, 2); + validateInitialQueueEntitlement(newCS, parentQueue, USER3, 0.27f); + validateCapacities(user3Queue, 0.3f, 0.09f, 0.4f, 0.2f); + validateUserAndAppLimits(user3Queue, 900, 900); + + } finally { + cleanupQueue(USER1); + cleanupQueue(USER2); + if (newMockRM != null) { + ((CapacityScheduler) newMockRM.getResourceScheduler()).stop(); + newMockRM.stop(); + } + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerDynamicBehavior.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerDynamicBehavior.java index 9425d5ea89b..9aba30c2e88 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerDynamicBehavior.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerDynamicBehavior.java @@ -77,21 +77,21 @@ public void testRefreshQueuesWithReservations() throws Exception { CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); //set default queue capacity to zero - ((AutoCreatedLeafQueue) cs + ((ReservationQueue) cs .getQueue("a" + ReservationConstants.DEFAULT_QUEUE_SUFFIX)) .setEntitlement( new QueueEntitlement(0f, 1f)); // Test add one reservation dynamically and manually modify capacity - AutoCreatedLeafQueue a1 = - new AutoCreatedLeafQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); + ReservationQueue a1 = + new ReservationQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); cs.addQueue(a1); a1.setEntitlement(new QueueEntitlement(A1_CAPACITY / 100, 1f)); // Test add another reservation queue and use setEntitlement to modify // capacity - AutoCreatedLeafQueue a2 = - new AutoCreatedLeafQueue(cs, "a2", (PlanQueue) cs.getQueue("a")); + ReservationQueue a2 = + new ReservationQueue(cs, "a2", (PlanQueue) cs.getQueue("a")); cs.addQueue(a2); cs.setEntitlement("a2", new QueueEntitlement(A2_CAPACITY / 100, 1.0f)); @@ -113,8 +113,8 @@ public void testAddQueueFailCases() throws Exception { try { // Test invalid addition (adding non-zero size queue) - AutoCreatedLeafQueue a1 = - new AutoCreatedLeafQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); + ReservationQueue a1 = + new ReservationQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); a1.setEntitlement(new QueueEntitlement(A1_CAPACITY / 100, 1f)); cs.addQueue(a1); fail(); @@ -123,11 +123,11 @@ public void testAddQueueFailCases() throws Exception { } // Test add one reservation dynamically and manually modify capacity - AutoCreatedLeafQueue a1 = - new AutoCreatedLeafQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); + ReservationQueue a1 = + new ReservationQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); cs.addQueue(a1); //set default queue capacity to zero - ((AutoCreatedLeafQueue) cs + ((ReservationQueue) cs .getQueue("a" + ReservationConstants.DEFAULT_QUEUE_SUFFIX)) .setEntitlement( new QueueEntitlement(0f, 1f)); @@ -135,8 +135,8 @@ public void testAddQueueFailCases() throws Exception { // Test add another reservation queue and use setEntitlement to modify // capacity - AutoCreatedLeafQueue a2 = - new AutoCreatedLeafQueue(cs, "a2", (PlanQueue) cs.getQueue("a")); + ReservationQueue a2 = + new ReservationQueue(cs, "a2", (PlanQueue) cs.getQueue("a")); cs.addQueue(a2); @@ -162,8 +162,8 @@ public void testRemoveQueue() throws Exception { CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); // Test add one reservation dynamically and manually modify capacity - AutoCreatedLeafQueue a1 = - new AutoCreatedLeafQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); + ReservationQueue a1 = + new ReservationQueue(cs, "a1", (PlanQueue) cs.getQueue("a")); cs.addQueue(a1); a1.setEntitlement(new QueueEntitlement(A1_CAPACITY / 100, 1f)); @@ -230,8 +230,8 @@ public void testMoveAppToPlanQueue() throws Exception { // create the default reservation queue String defQName = "a" + ReservationConstants.DEFAULT_QUEUE_SUFFIX; - AutoCreatedLeafQueue defQ = - new AutoCreatedLeafQueue(scheduler, defQName, + ReservationQueue defQ = + new ReservationQueue(scheduler, defQName, (PlanQueue) scheduler.getQueue("a")); scheduler.addQueue(defQ); defQ.setEntitlement(new QueueEntitlement(1f, 1f)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestGuaranteedOrZeroCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestGuaranteedOrZeroCapacityOverTimePolicy.java new file mode 100644 index 00000000000..c71d2bf9134 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestGuaranteedOrZeroCapacityOverTimePolicy.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .queuemanagement.GuaranteedOrZeroCapacityOverTimePolicy; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestGuaranteedOrZeroCapacityOverTimePolicy { + + @Test + public void testGetMaxLeavesToBeActivated() + throws SchedulerDynamicEditException { + GuaranteedOrZeroCapacityOverTimePolicy policy = + new GuaranteedOrZeroCapacityOverTimePolicy(); + + assertEquals(1, policy.getMaxLeavesToBeActivated(0.17f, 0.03f, 1)); + assertEquals(5, policy.getMaxLeavesToBeActivated(0.17f, 0.03f, 7)); + assertEquals(0, policy.getMaxLeavesToBeActivated(0, 0.03f, 10)); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 1426e881385..c45bdb4fd0f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -18,6 +18,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; +import static org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager + .NO_LABEL; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CSQueueUtils.EPSILON; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CapacitySchedulerConfiguration.DOT; +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CapacitySchedulerConfiguration.ROOT; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -44,6 +52,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -189,7 +198,7 @@ private void setUpInternal(ResourceCalculator rC) throws Exception { root = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, - CapacitySchedulerConfiguration.ROOT, + ROOT, queues, queues, TestUtils.spyHook); root.updateClusterResource(Resources.createResource(100 * 16 * GB, 100 * 32), @@ -222,12 +231,12 @@ private void setupQueueConfiguration( final String newRoot) { // Define top-level queues - conf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] {newRoot}); - conf.setMaximumCapacity(CapacitySchedulerConfiguration.ROOT, 100); - conf.setAcl(CapacitySchedulerConfiguration.ROOT, + conf.setQueues(ROOT, new String[] {newRoot}); + conf.setMaximumCapacity(ROOT, 100); + conf.setAcl(ROOT, QueueACL.SUBMIT_APPLICATIONS, " "); - final String Q_newRoot = CapacitySchedulerConfiguration.ROOT + "." + newRoot; + final String Q_newRoot = ROOT + "." + newRoot; conf.setQueues(Q_newRoot, new String[] {A, B, C, D, E}); conf.setCapacity(Q_newRoot, 100); conf.setMaximumCapacity(Q_newRoot, 100); @@ -410,7 +419,7 @@ public void testPolicyConfiguration() throws Exception { CapacitySchedulerConfiguration testConf = new CapacitySchedulerConfiguration(); - String tproot = CapacitySchedulerConfiguration.ROOT + "." + + String tproot = ROOT + "." + "testPolicyRoot" + System.currentTimeMillis(); OrderingPolicy comPol = @@ -485,7 +494,7 @@ public void testFairConfiguration() throws Exception { CapacitySchedulerConfiguration testConf = new CapacitySchedulerConfiguration(); - String tproot = CapacitySchedulerConfiguration.ROOT + "." + + String tproot = ROOT + "." + "testPolicyRoot" + System.currentTimeMillis(); OrderingPolicy schedOrder = @@ -722,12 +731,12 @@ public void testDRFUsageRatioRounding() throws Exception { Priority priority = TestUtils.createMockPriority(1); app0.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 20 * GB, 29, 1, true, - priority, recordFactory, RMNodeLabelsManager.NO_LABEL))); + priority, recordFactory, NO_LABEL))); assign = b.assignContainers(clusterResource, node0, new ResourceLimits( clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); app0.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 10 * GB, 29, 2, true, - priority, recordFactory, RMNodeLabelsManager.NO_LABEL))); + priority, recordFactory, NO_LABEL))); assign = b.assignContainers(clusterResource, node0, new ResourceLimits( clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); assertTrue("Still within limits, should assign", @@ -796,11 +805,11 @@ public void testDRFUserLimits() throws Exception { Priority priority = TestUtils.createMockPriority(1); app0.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 1 * GB, 40, 10, true, - priority, recordFactory, RMNodeLabelsManager.NO_LABEL))); + priority, recordFactory, NO_LABEL))); app2.updateResourceRequests(Collections.singletonList(TestUtils .createResourceRequest(ResourceRequest.ANY, 2 * GB, 10, 10, true, - priority, recordFactory, RMNodeLabelsManager.NO_LABEL))); + priority, recordFactory, NO_LABEL))); /** * Start testing... @@ -2277,7 +2286,7 @@ public void testRackLocalityDelayScheduling() throws Exception { CapacitySchedulerConfiguration.RACK_LOCALITY_ADDITIONAL_DELAY, 1); Map newQueues = new HashMap(); CSQueue newRoot = CapacitySchedulerQueueManager.parseQueue(csContext, - csConf, null, CapacitySchedulerConfiguration.ROOT, newQueues, queues, + csConf, null, ROOT, newQueues, queues, TestUtils.spyHook); root.reinitialize(newRoot, cs.getClusterResource()); @@ -2712,9 +2721,12 @@ public void testActivateApplicationAfterQueueRefresh() throws Exception { CapacitySchedulerConfiguration.DEFAULT_MAXIMUM_APPLICATIONMASTERS_RESOURCE_PERCENT * 2); Map newQueues = new HashMap(); - CSQueue newRoot = CapacitySchedulerQueueManager.parseQueue(csContext, - csConf, null, CapacitySchedulerConfiguration.ROOT, newQueues, queues, - TestUtils.spyHook); + CSQueue newRoot = + CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, + ROOT, + newQueues, queues, + TestUtils.spyHook); + queues = newQueues; root.reinitialize(newRoot, csContext.getClusterResource()); // after reinitialization @@ -2738,7 +2750,7 @@ public void testLocalityDelaysAfterQueueRefresh() throws Exception { Map newQueues = new HashMap(); CSQueue newRoot = CapacitySchedulerQueueManager.parseQueue(csContext, csConf, null, - CapacitySchedulerConfiguration.ROOT, + ROOT, newQueues, queues, TestUtils.spyHook); root.reinitialize(newRoot, cs.getClusterResource()); @@ -3621,7 +3633,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0 * GB, e.getTotalPendingResourcesConsideringUserLimit(clusterResource, - RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + NO_LABEL, false).getMemorySize()); // Assign 2nd container of 1GB applyCSAssignment(clusterResource, @@ -3635,7 +3647,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // Can't allocate 3rd container due to user-limit. Headroom still 0. applyCSAssignment(clusterResource, @@ -3645,7 +3657,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(2*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // Increase user-limit-factor from 1GB to 10GB (1% * 10 * 100GB = 10GB). // Pending for both app_0 and app_1 are still 3GB, so user-limit-factor @@ -3653,7 +3665,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() // getTotalPendingResourcesConsideringUserLimit() e.setUserLimitFactor(10.0f); assertEquals(3*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); applyCSAssignment(clusterResource, e.assignContainers(clusterResource, node_0, @@ -3663,7 +3675,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // Get the last 2 containers for app_1, no more pending requests. applyCSAssignment(clusterResource, @@ -3677,7 +3689,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitOneUser() assertEquals(3*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // Release each container from app_0 for (RMContainer rmContainer : app_0.getLiveContainers()) { @@ -3788,7 +3800,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // With queue capacity set at 1% of 100GB and user-limit-factor set to 1.0, // queue 'e' should be able to consume 1GB per user. assertEquals(2*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // None of the apps have assigned resources // user_0's apps: assertEquals(0*GB, app_0.getCurrentConsumption().getMemorySize()); @@ -3805,7 +3817,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // The first container was assigned to user_0's app_0. Queues total headroom // has 1GB left for user_1. assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3823,7 +3835,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // this container went to user_0's app_1. so, headroom for queue 'e'e is // still 1GB for user_1 assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3839,7 +3851,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // Container was allocated to user_1's app_2 since user_1, Now, no headroom // is left. assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(1*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3855,7 +3867,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // Allocated to user_1's app_2 since scheduler allocates 1 container // above user resource limit. Available headroom still 0. assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: long app_0_consumption = app_0.getCurrentConsumption().getMemorySize(); assertEquals(1*GB, app_0_consumption); @@ -3875,7 +3887,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // Cannot allocate 5th container because both users are above their allowed // user resource limit. Values should be the same as previously. assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(app_0_consumption, app_0.getCurrentConsumption().getMemorySize()); assertEquals(app_1_consumption, app_1.getCurrentConsumption().getMemorySize()); @@ -3894,7 +3906,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), e, nodes, apps); // Next container goes to user_0's app_1, since it still wanted 1GB. assertEquals(1*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); // user_0's apps: assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); @@ -3909,7 +3921,7 @@ public void testGetTotalPendingResourcesConsideringUserLimitTwoUsers() // Last container goes to user_1's app_3, since it still wanted 1GB. // user_0's apps: assertEquals(0*GB, e.getTotalPendingResourcesConsideringUserLimit( - clusterResource, RMNodeLabelsManager.NO_LABEL, false).getMemorySize()); + clusterResource, NO_LABEL, false).getMemorySize()); assertEquals(1*GB, app_0.getCurrentConsumption().getMemorySize()); assertEquals(2*GB, app_1.getCurrentConsumption().getMemorySize()); // user_1's apps: @@ -4027,6 +4039,59 @@ public void testApplicationQueuePercent() app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f); } + @Test + public void testSetupQueueConfigsWithSpecifiedConfiguration() + throws IOException { + + try { + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration( + new Configuration(false), false); + + final String leafQueueName = + "testSetupQueueConfigsWithSpecifiedConfiguration"; + + assertEquals(0, conf.size()); + conf.setNodeLocalityDelay(60); + conf.setCapacity(ROOT + DOT + leafQueueName, 10); + conf.setMaximumCapacity(ROOT + DOT + leafQueueName, 100); + conf.setUserLimitFactor(ROOT + DOT +leafQueueName, 0.1f); + + csConf.setNodeLocalityDelay(30); + csConf.setGlobalMaximumApplicationsPerQueue(20); + + LeafQueue leafQueue = new LeafQueue(csContext, conf, + leafQueueName, cs.getRootQueue(), + null); + + assertEquals(30, leafQueue.getNodeLocalityDelay()); + assertEquals(20, leafQueue.getMaxApplications()); + assertEquals(2, leafQueue.getMaxApplicationsPerUser()); + + //check queue configs + conf.setMaximumAMResourcePercentPerPartition(leafQueue.getQueueName(), + NO_LABEL, 10); + conf.setMaximumCapacity(leafQueue.getQueueName(), 10); + + assertEquals(0.1, leafQueue.getMaxAMResourcePerQueuePercent(), + EPSILON); + assertEquals(1, leafQueue.getMaximumCapacity(), + EPSILON); + assertEquals(0.1, leafQueue.getCapacity(), + EPSILON); + assertEquals(0.1, leafQueue.getAbsoluteCapacity(), + EPSILON); + assertEquals(1.0, leafQueue.getAbsoluteMaximumCapacity(), + EPSILON); + + } finally { + //revert config changes + csConf.setNodeLocalityDelay( + CapacitySchedulerConfiguration.DEFAULT_NODE_LOCALITY_DELAY); + csConf.setGlobalMaximumApplicationsPerQueue( + (int) CapacitySchedulerConfiguration.UNDEFINED); + } + } + private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) { ApplicationId appIdImpl = ApplicationId.newInstance(0, appId); ApplicationAttemptId attId = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueManagementDynamicEditPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueManagementDynamicEditPolicy.java new file mode 100644 index 00000000000..4dc56fb314b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueManagementDynamicEditPolicy.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .queuemanagement.GuaranteedOrZeroCapacityOverTimePolicy; +import org.junit.Before; +import org.junit.Test; + + +import static org.apache.hadoop.yarn.server.resourcemanager.scheduler + .capacity.CSQueueUtils.EPSILON; +import static org.junit.Assert.assertEquals; + +public class TestQueueManagementDynamicEditPolicy extends + TestCapacitySchedulerAutoCreatedQueueBase { + private QueueManagementDynamicEditPolicy policy = new + QueueManagementDynamicEditPolicy(); + + @Before + public void setUp() throws Exception { + super.setUp(); + policy.init(cs.getConfiguration(), cs.getRMContext(), cs); + } + + @Test + public void testEditSchedule() throws Exception { + + try { + policy.editSchedule(); + assertEquals(2, policy.getManagedParentQueues().size()); + + CSQueue parentQueue = cs.getQueue(PARENT_QUEUE); + + GuaranteedOrZeroCapacityOverTimePolicy autoCreatedQueueManagementPolicy = + (GuaranteedOrZeroCapacityOverTimePolicy) ((ManagedParentQueue) + parentQueue) + .getAutoCreatedQueueManagementPolicy(); + assertEquals(0f, autoCreatedQueueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity(), EPSILON); + + //submit app1 as USER1 + ApplicationId user1AppId = submitApp(mockRM, parentQueue, USER1, USER1, 1, + 1); + validateInitialQueueEntitlement(parentQueue, USER1, 0.1f); + + //submit another app2 as USER2 + ApplicationId user2AppId = submitApp(mockRM, parentQueue, USER2, USER2, 2, + 1); + validateInitialQueueEntitlement(parentQueue, USER2, 0.2f); + + //validate total activated abs capacity + assertEquals(0.2f, autoCreatedQueueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity(), EPSILON); + + //submit user_3 app. This cant be scheduled since there is no capacity + submitApp(mockRM, parentQueue, USER3, USER3, 3, 1); + final CSQueue user3LeafQueue = cs.getQueue(USER3); + validateCapacities((AutoCreatedLeafQueue) user3LeafQueue, 0.0f, 0.0f, + 1.0f, 1.0f); + + assertEquals(autoCreatedQueueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity(), 0.2f, EPSILON); + + //deactivate USER2 queue + cs.killAllAppsInQueue(USER2); + mockRM.waitForState(user2AppId, RMAppState.KILLED); + + //deactivate USER1 queue + cs.killAllAppsInQueue(USER1); + mockRM.waitForState(user1AppId, RMAppState.KILLED); + + policy.editSchedule(); + + waitForPolicyState(0.1f, autoCreatedQueueManagementPolicy, 1000); + + validateCapacities((AutoCreatedLeafQueue) user3LeafQueue, 0.5f, 0.1f, + 1.0f, 1.0f); + + validateCapacitiesByLabel((ManagedParentQueue) parentQueue, (AutoCreatedLeafQueue) user3LeafQueue, + NODEL_LABEL_GPU); + + } finally { + cleanupQueue(USER1); + cleanupQueue(USER2); + cleanupQueue(USER3); + } + } + + private void waitForPolicyState(float expectedVal, + GuaranteedOrZeroCapacityOverTimePolicy queueManagementPolicy, int + timesec) throws + InterruptedException { + long start = System.currentTimeMillis(); + while (System.currentTimeMillis() - start < timesec * 1000) { + if (Float.compare(expectedVal, queueManagementPolicy + .getAbsoluteActivatedChildQueueCapacity()) != 0) { + Thread.sleep(100); + } else { + break; + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAutoCreatedLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java similarity index 95% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAutoCreatedLeafQueue.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java index b403e724533..f6b4f2a31d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestAutoCreatedLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservationQueue.java @@ -38,9 +38,9 @@ /** * Test class for dynamic auto created leaf queues. - * @see AutoCreatedLeafQueue + * @see ReservationQueue */ -public class TestAutoCreatedLeafQueue { +public class TestReservationQueue { private CapacitySchedulerConfiguration csConf; private CapacitySchedulerContext csContext; @@ -48,12 +48,13 @@ final static int GB = 1024; private final ResourceCalculator resourceCalculator = new DefaultResourceCalculator(); - private AutoCreatedLeafQueue autoCreatedLeafQueue; + private ReservationQueue autoCreatedLeafQueue; @Before public void setup() throws IOException { // setup a context / conf csConf = new CapacitySchedulerConfiguration(); + YarnConfiguration conf = new YarnConfiguration(); csContext = mock(CapacitySchedulerContext.class); when(csContext.getConfiguration()).thenReturn(csConf); @@ -70,7 +71,7 @@ public void setup() throws IOException { // create a queue PlanQueue pq = new PlanQueue(csContext, "root", null, null); - autoCreatedLeafQueue = new AutoCreatedLeafQueue(csContext, "a", pq); + autoCreatedLeafQueue = new ReservationQueue(csContext, "a", pq); } private void validateAutoCreatedLeafQueue(double capacity) {