diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java index 0a2fa3a..0600b83 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java @@ -174,6 +174,9 @@ public static void updateQueueStatistics( public static float getAbsoluteMaxAvailCapacity( ResourceCalculator resourceCalculator, Resource clusterResource, CSQueue queue) { + //Max avail capacity needs to take into account usage by ancestor-siblings + //which are greater than their base capacity, so we are interested in + //"max avail" capacity CSQueue parent = queue.getParent(); if (parent == null) { return queue.getAbsoluteMaximumCapacity(); @@ -189,15 +192,25 @@ public static float getAbsoluteMaxAvailCapacity( if (Resources.isInvalidDivisor(resourceCalculator, parentResource)) { return 0.0f; } + + Resource queueUsedResources; + float queueMaxCap; + synchronized (queue) { + queueUsedResources = queue.getUsedResources(); + queueMaxCap = queue.getMaximumCapacity(); + } + + Resource parentUsedResources = parent.getUsedResources(); + //sibling used is parent used - my used... float siblingUsedCapacity = Resources.ratio( resourceCalculator, - Resources.subtract(parent.getUsedResources(), queue.getUsedResources()), + Resources.subtract(parentUsedResources, queueUsedResources), parentResource); //my max avail is the lesser of my max capacity and what is unused from my parent //by my siblings (if they are beyond their base capacity) float maxAvail = Math.min( - queue.getMaximumCapacity(), + queueMaxCap, 1.0f - siblingUsedCapacity); //and, mutiply by parent to get absolute (cluster relative) value float absoluteMaxAvail = maxAvail * parentMaxAvail; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index ffeec63..b9ab6a4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -115,6 +115,8 @@ private final QueueHeadroomInfo queueHeadroomInfo = new QueueHeadroomInfo(); + private volatile float absoluteMaxAvailCapacity; + public LeafQueue(CapacitySchedulerContext cs, String queueName, CSQueue parent, CSQueue old) throws IOException { super(cs, queueName, parent, old); @@ -133,6 +135,10 @@ public LeafQueue(CapacitySchedulerContext cs, (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100; float absoluteMaxCapacity = CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent); + + // Initially set to absoluteMax, will be updated to more accurate + // max avail value during assignContainers + absoluteMaxAvailCapacity = absoluteMaxCapacity; int userLimit = cs.getConfiguration().getUserLimit(getQueuePath()); float userLimitFactor = @@ -720,8 +726,18 @@ private synchronized FiCaSchedulerApp getApplication( } @Override - public synchronized CSAssignment assignContainers(Resource clusterResource, + public CSAssignment assignContainers(Resource clusterResource, FiCaSchedulerNode node, boolean needToUnreserve) { + //We should not hold a lock on a queue and its parent concurrently - it + //can lead to deadlocks when calls which walk down the tree occur + //concurrently (getQueueInfo...) + absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity( + resourceCalculator, clusterResource, this); + return assignContainersInner(clusterResource, node, needToUnreserve); + } + + private synchronized CSAssignment assignContainersInner( + Resource clusterResource, FiCaSchedulerNode node, boolean needToUnreserve) { if(LOG.isDebugEnabled()) { LOG.debug("assignContainers: node=" + node.getNodeName() @@ -1012,12 +1028,6 @@ Resource computeUserLimitAndSetHeadroom(FiCaSchedulerApp application, computeUserLimit(application, clusterResource, required, queueUser, requestedLabels); - //Max avail capacity needs to take into account usage by ancestor-siblings - //which are greater than their base capacity, so we are interested in "max avail" - //capacity - float absoluteMaxAvailCapacity = CSQueueUtils.getAbsoluteMaxAvailCapacity( - resourceCalculator, clusterResource, this); - Resource queueMaxCap = // Queue Max-Capacity Resources.multiplyAndNormalizeDown( resourceCalculator,