diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java index 1dd5586..189d888 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java @@ -113,4 +113,13 @@ public static void updateQueueStatistics( ) ); } + + public static CSQueue getRootQueue(CSQueue current) { + CSQueue parent = current.getParent(); + while (parent != null) { + current = parent; + parent = current.getParent(); + } + return current; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 65938aa..c7b1c47 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -991,6 +991,20 @@ private Resource computeUserLimitAndSetHeadroom( Resources.min(resourceCalculator, clusterResource, userLimit, queueMaxCap), userConsumed); + + + //((CapacityScheduler)scheduler.getRMContext().getScheduler()).getRootQueue().getUsedResources() + //Min headroom against available cluster Reasource, to protect against cases + //where headroom may be mistakenly overreported, leading to task (mapper) starvation + //(due to failure to pre-empt reducers, for example) + Resource clusterHeadroom = Resources.subtract( + clusterResource, + CSQueueUtils.getRootQueue(this).getUsedResources() + ); + if (LOG.isDebugEnabled()) { + LOG.debug(" clusterHeadroom=" + clusterHeadroom); + } + headroom = Resources.min(resourceCalculator, clusterResource, headroom, clusterHeadroom); if (LOG.isDebugEnabled()) { LOG.debug("Headroom calculation for user " + user + ": " +