From d006ee5af95e870b128d644f2ac9fa2f8b08cd5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=A8=E6=97=AD=E6=B3=BD?= Date: Mon, 13 Nov 2023 20:23:34 +0800 Subject: [PATCH 1/1] YARN-11603. Resourcemanager cannot allocate resources to tasks when a dimension's resources are used up --- .../scheduler/capacity/AbstractLeafQueue.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java index 280d3d182fb..1916a27c5a0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java @@ -1253,6 +1253,7 @@ public CSAssignment assignContainers(Resource clusterResource, } // Check user limit boolean userAssignable = true; + cul.canAssign = true; if (!cul.canAssign && Resources.fitsIn(appReserved, cul.reservation)) { userAssignable = false; } else { @@ -1355,6 +1356,14 @@ public boolean accept(Resource cluster, Resources.subtractFrom(usedResource, request.getTotalReleasedResource()); + // If the requested resource has a dimension of 0 and it is not MEMORY or CPU, set the corresponding resource in usedResource to 0 + Resource resRequest = allocation.getAllocatedOrReservedResource(); + for(ResourceInformation resInfo : resRequest.getResources()){ + if(resInfo.getValue() == 0 && !resInfo.getName().equals(ResourceInformation.MEMORY_URI) && !resInfo.getName().equals(ResourceInformation.VCORES_URI)){ + usedResource.setResourceValue(resInfo.getName(), 0); + } + } + if (Resources.greaterThan(resourceCalculator, cluster, usedResource, userLimit)) { LOG.debug("Used resource={} exceeded user-limit={}", @@ -1659,12 +1668,21 @@ protected boolean canAssignToUser(Resource clusterResource, return false; } + Resource resPending = application.getAppAttemptResourceUsage().getPending(); + //If the requested resource has a dimension of 0 and it is not MEMORY or CPU, set the corresponding resource in usedResource to 0 + Resource userUsed = Resource.newInstance(user.getUsed(nodePartition)); + for(ResourceInformation resInfo : resPending.getResources()){ + if(resInfo.getValue() == 0 && !resInfo.getName().equals(ResourceInformation.MEMORY_URI) && !resInfo.getName().equals(ResourceInformation.VCORES_URI)){ + userUsed.setResourceValue(resInfo.getName(), 0); + } + } + currentResourceLimits.setAmountNeededUnreserve(Resources.none()); // Note: We aren't considering the current request since there is a fixed // overhead of the AM, but it's a > check, not a >= check, so... if (Resources.greaterThan(resourceCalculator, clusterResource, - user.getUsed(nodePartition), limit)) { + userUsed, limit)) { // if enabled, check to see if could we potentially use this node instead // of a reserved node if the application has reserved containers if (this.reservationsContinueLooking) { -- 2.18.0.windows.1