diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index 228c6af..2339c79 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -22,6 +22,7 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -589,7 +590,8 @@ public ApplicationSubmissionContext createApplicationSubmissionContext( amResourceRequest.setCapability(capability); amResourceRequest.setNumContainers(1); amResourceRequest.setNodeLabelExpression(amNodelabelExpression.trim()); - appContext.setAMContainerResourceRequest(amResourceRequest); + appContext.setAMContainerResourceRequests( + Collections.singletonList(amResourceRequest)); } // set labels for the Job containers appContext.setNodeLabelExpression(jobConf diff --git hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index 279c8ce..c2bda62 100644 --- hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -571,7 +571,7 @@ public void testNodeLabelExp() throws Exception { buildSubmitContext(yarnRunner, jobConf); assertEquals(appSubCtx.getNodeLabelExpression(), "GPU"); - assertEquals(appSubCtx.getAMContainerResourceRequest() + assertEquals(appSubCtx.getAMContainerResourceRequests().get(0) .getNodeLabelExpression(), "highMem"); } diff --git hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index df8323a..377cbef 100644 --- hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -892,6 +892,11 @@ public int getNumClusterNodes() { } @Override + public List getClusterNodeIdsByResourceName(String resourceName) { + return nodeTracker.getNodeIdsByResourceName(resourceName); + } + + @Override public SchedulerNodeReport getNodeReport(NodeId nodeId) { return scheduler.getNodeReport(nodeId); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java index e562aaa..6fe9ec1 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.api.records; +import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; @@ -100,7 +102,7 @@ public static ApplicationSubmissionContext newInstance( amReq.setNumContainers(1); amReq.setRelaxLocality(true); amReq.setNodeLabelExpression(amContainerLabelExpression); - context.setAMContainerResourceRequest(amReq); + context.setAMContainerResourceRequests(Collections.singletonList(amReq)); return context; } @@ -159,7 +161,8 @@ public static ApplicationSubmissionContext newInstance( context.setApplicationType(applicationType); context.setKeepContainersAcrossApplicationAttempts(keepContainers); context.setNodeLabelExpression(appLabelExpression); - context.setAMContainerResourceRequest(resourceRequest); + context.setAMContainerResourceRequests( + Collections.singletonList(resourceRequest)); return context; } @@ -454,30 +457,58 @@ public abstract void setKeepContainersAcrossApplicationAttempts( public abstract void setNodeLabelExpression(String nodeLabelExpression); /** - * Get ResourceRequest of AM container, if this is not null, scheduler will - * use this to acquire resource for AM container. - * + * Get the ResourceRequest of the AM container; if this is not null, + * scheduler will use this to acquire resource for AM container. + * * If this is null, scheduler will assemble a ResourceRequest by using * getResource and getPriority of * ApplicationSubmissionContext. - * - * Number of containers and Priority will be ignore. - * - * @return ResourceRequest of AM container + * + * Number of containers and Priority will be ignored. + * + * @return ResourceRequest of the AM container + * @deprecated See {@link #getAMContainerResourceRequests()} */ @Public @Evolving + @Deprecated public abstract ResourceRequest getAMContainerResourceRequest(); /** - * Set ResourceRequest of AM container - * @param request of AM container + * Set ResourceRequest of the AM container + * @param request of the AM container + * @deprecated See {@link #setAMContainerResourceRequests(List)} */ @Public @Evolving + @Deprecated public abstract void setAMContainerResourceRequest(ResourceRequest request); /** + * Get the ResourceRequests of the AM container; if this is not null, + * scheduler will use this to acquire resource for AM container. + * + * If this is null, scheduler will use the ResourceRequest as determined by + * getAMContainerResourceRequest and its behavior. + * + * Number of containers and Priority will be ignored. + * + * @return List of ResourceRequests of the AM container + */ + @Public + @Evolving + public abstract List getAMContainerResourceRequests(); + + /** + * Set ResourceRequests of the AM container. + * @param requests of the AM container + */ + @Public + @Evolving + public abstract void setAMContainerResourceRequests( + List requests); + + /** * Get the attemptFailuresValidityInterval in milliseconds for the application * * @return the attemptFailuresValidityInterval diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 3b26a5c..587354a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -378,7 +378,7 @@ message ApplicationSubmissionContextProto { optional LogAggregationContextProto log_aggregation_context = 14; optional ReservationIdProto reservation_id = 15; optional string node_label_expression = 16; - optional ResourceRequestProto am_container_resource_request = 17; + repeated ResourceRequestProto am_container_resource_request = 17; repeated ApplicationTimeoutMapProto application_timeouts = 18; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java index 62b54e7..1a6719a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.api.records.impl.pb; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -66,7 +68,7 @@ private ContainerLaunchContext amContainer = null; private Resource resource = null; private Set applicationTags = null; - private ResourceRequest amResourceRequest = null; + private List amResourceRequests = null; private LogAggregationContext logAggregationContext = null; private ReservationId reservationId = null; private Map applicationTimeouts = null; @@ -127,9 +129,10 @@ private void mergeLocalToBuilder() { builder.clearApplicationTags(); builder.addAllApplicationTags(this.applicationTags); } - if (this.amResourceRequest != null) { - builder.setAmContainerResourceRequest( - convertToProtoFormat(this.amResourceRequest)); + if (this.amResourceRequests != null) { + builder.clearAmContainerResourceRequest(); + builder.addAllAmContainerResourceRequest( + convertToProtoFormat(this.amResourceRequests)); } if (this.logAggregationContext != null) { builder.setLogAggregationContext( @@ -430,13 +433,23 @@ private PriorityPBImpl convertFromProtoFormat(PriorityProto p) { private PriorityProto convertToProtoFormat(Priority t) { return ((PriorityPBImpl)t).getProto(); } - - private ResourceRequestPBImpl convertFromProtoFormat(ResourceRequestProto p) { - return new ResourceRequestPBImpl(p); + + private List convertFromProtoFormat( + List ps) { + List rs = new ArrayList<>(); + for (ResourceRequestProto p : ps) { + rs.add(new ResourceRequestPBImpl(p)); + } + return rs; } - private ResourceRequestProto convertToProtoFormat(ResourceRequest t) { - return ((ResourceRequestPBImpl)t).getProto(); + private List convertToProtoFormat( + List ts) { + List rs = new ArrayList<>(ts.size()); + for (ResourceRequest t : ts) { + rs.add(((ResourceRequestPBImpl)t).getProto()); + } + return rs; } private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) { @@ -485,25 +498,46 @@ public void setNodeLabelExpression(String labelExpression) { } @Override + @Deprecated public ResourceRequest getAMContainerResourceRequest() { + List reqs = getAMContainerResourceRequests(); + if (reqs == null || reqs.isEmpty()) { + return null; + } + return getAMContainerResourceRequests().get(0); + } + + @Override + public List getAMContainerResourceRequests() { ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; - if (this.amResourceRequest != null) { - return amResourceRequest; + if (this.amResourceRequests != null) { + return amResourceRequests; } // Else via proto - if (!p.hasAmContainerResourceRequest()) { + if (p.getAmContainerResourceRequestCount() == 0) { return null; } - amResourceRequest = convertFromProtoFormat(p.getAmContainerResourceRequest()); - return amResourceRequest; + amResourceRequests = + convertFromProtoFormat(p.getAmContainerResourceRequestList()); + return amResourceRequests; } @Override + @Deprecated public void setAMContainerResourceRequest(ResourceRequest request) { maybeInitBuilder(); if (request == null) { builder.clearAmContainerResourceRequest(); } - this.amResourceRequest = request; + this.amResourceRequests = Collections.singletonList(request); + } + + @Override + public void setAMContainerResourceRequests(List requests) { + maybeInitBuilder(); + if (requests == null) { + builder.clearAmContainerResourceRequest(); + } + this.amResourceRequests = requests; } @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/RMNodeLabel.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/RMNodeLabel.java index feeeaf1..7fee7be 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/RMNodeLabel.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/RMNodeLabel.java @@ -29,28 +29,28 @@ public class RMNodeLabel implements Comparable { private Resource resource; - private int numActiveNMs; private String labelName; private Set nodeIds; + private Set activeNodeIds; private boolean exclusive; private NodeLabel nodeLabel; public RMNodeLabel(NodeLabel nodeLabel) { - this(nodeLabel.getName(), Resource.newInstance(0, 0), 0, + this(nodeLabel.getName(), Resource.newInstance(0, 0), new HashSet<>(), nodeLabel.isExclusive()); } public RMNodeLabel(String labelName) { - this(labelName, Resource.newInstance(0, 0), 0, + this(labelName, Resource.newInstance(0, 0), new HashSet<>(), NodeLabel.DEFAULT_NODE_LABEL_EXCLUSIVITY); } - protected RMNodeLabel(String labelName, Resource res, int activeNMs, - boolean exclusive) { + protected RMNodeLabel(String labelName, Resource res, + Set activeNodeIds, boolean exclusive) { this.labelName = labelName; this.resource = res; - this.numActiveNMs = activeNMs; - this.nodeIds = new HashSet(); + this.nodeIds = new HashSet<>(); + this.activeNodeIds = activeNodeIds; this.exclusive = exclusive; this.nodeLabel = NodeLabel.newInstance(labelName, exclusive); } @@ -67,14 +67,14 @@ public void removeNodeId(NodeId node) { return new HashSet(nodeIds); } - public void addNode(Resource nodeRes) { - Resources.addTo(resource, nodeRes); - numActiveNMs++; + public void addNode(CommonNodeLabelsManager.Node node) { + Resources.addTo(resource, node.resource); + activeNodeIds.add(node.nodeId); } - public void removeNode(Resource nodeRes) { - Resources.subtractFrom(resource, nodeRes); - numActiveNMs--; + public void removeNode(CommonNodeLabelsManager.Node node) { + Resources.subtractFrom(resource, node.resource); + activeNodeIds.remove(node.nodeId); } public Resource getResource() { @@ -82,7 +82,11 @@ public Resource getResource() { } public int getNumActiveNMs() { - return numActiveNMs; + return activeNodeIds.size(); + } + + public Set getActiveNodeIds() { + return new HashSet<>(activeNodeIds); } public String getLabelName() { @@ -98,7 +102,7 @@ public boolean getIsExclusive() { } public RMNodeLabel getCopy() { - return new RMNodeLabel(labelName, resource, numActiveNMs, exclusive); + return new RMNodeLabel(labelName, resource, activeNodeIds, exclusive); } public NodeLabel getNodeLabel() { @@ -124,7 +128,7 @@ public boolean equals(Object obj) { RMNodeLabel other = (RMNodeLabel) obj; return Resources.equals(resource, other.getResource()) && StringUtils.equals(labelName, other.getLabelName()) - && (other.getNumActiveNMs() == numActiveNMs); + && (activeNodeIds.equals(other.getActiveNodeIds())); } return false; } @@ -133,6 +137,6 @@ public boolean equals(Object obj) { public int hashCode() { final int prime = 502357; return (int) ((((long) labelName.hashCode() << 8) - + (resource.hashCode() << 4) + numActiveNMs) % prime); + + (resource.hashCode() << 4) + activeNodeIds.hashCode()) % prime); } } \ No newline at end of file diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index e211867..f9422ec 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.yarn.server.resourcemanager; +import java.util.Collections; import java.util.LinkedList; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -31,6 +33,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationTimeoutType; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -337,14 +341,16 @@ protected void recoverApplication(ApplicationStateData appState, // has been disabled. Reject the recovery of this application if it // is true and give clear message so that user can react properly. if (!appContext.getUnmanagedAM() && - application.getAMResourceRequest() == null && + (application.getAMResourceRequests() == null || + application.getAMResourceRequests().isEmpty()) && !YarnConfiguration.areNodeLabelsEnabled(this.conf)) { // check application submission context and see if am resource request // or application itself contains any node label expression. - ResourceRequest amReqFromAppContext = - appContext.getAMContainerResourceRequest(); - String labelExp = (amReqFromAppContext != null) ? - amReqFromAppContext.getNodeLabelExpression() : null; + List amReqsFromAppContext = + appContext.getAMContainerResourceRequests(); + String labelExp = + (amReqsFromAppContext != null && !amReqsFromAppContext.isEmpty()) ? + amReqsFromAppContext.get(0).getNodeLabelExpression() : null; if (labelExp == null) { labelExp = appContext.getNodeLabelExpression(); } @@ -379,9 +385,9 @@ private RMAppImpl createAndPopulateNewRMApp( } ApplicationId applicationId = submissionContext.getApplicationId(); - ResourceRequest amReq = null; + List amReqs = null; try { - amReq = validateAndCreateResourceRequest(submissionContext, isRecovery); + amReqs = validateAndCreateResourceRequest(submissionContext, isRecovery); } catch (InvalidLabelResourceRequestException e) { // This can happen if the application had been submitted and run // with Node Label enabled but recover with Node Label disabled. @@ -444,7 +450,7 @@ private RMAppImpl createAndPopulateNewRMApp( submissionContext.getQueue(), submissionContext, this.scheduler, this.masterService, submitTime, submissionContext.getApplicationType(), - submissionContext.getApplicationTags(), amReq, startTime); + submissionContext.getApplicationTags(), amReqs, startTime); // Concurrent app submissions with same applicationId will fail here // Concurrent app submissions with different applicationIds will not // influence each other @@ -470,7 +476,7 @@ private RMAppImpl createAndPopulateNewRMApp( return application; } - private ResourceRequest validateAndCreateResourceRequest( + private List validateAndCreateResourceRequest( ApplicationSubmissionContext submissionContext, boolean isRecovery) throws InvalidResourceRequestException { // Validation of the ApplicationSubmissionContext needs to be completed @@ -480,33 +486,71 @@ private ResourceRequest validateAndCreateResourceRequest( // Check whether AM resource requirements are within required limits if (!submissionContext.getUnmanagedAM()) { - ResourceRequest amReq = submissionContext.getAMContainerResourceRequest(); - if (amReq == null) { - amReq = BuilderUtils - .newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, - ResourceRequest.ANY, submissionContext.getResource(), 1); - } - - // set label expression for AM container - if (null == amReq.getNodeLabelExpression()) { - amReq.setNodeLabelExpression(submissionContext - .getNodeLabelExpression()); + List amReqs = + submissionContext.getAMContainerResourceRequests(); + if (amReqs == null || amReqs.isEmpty()) { + if (submissionContext.getResource() != null) { + amReqs = Collections.singletonList(BuilderUtils + .newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, + ResourceRequest.ANY, submissionContext.getResource(), 1)); + } else { + throw new InvalidResourceRequestException("Invalid resource request, " + + "no resources requested"); + } } try { - SchedulerUtils.normalizeAndValidateRequest(amReq, - scheduler.getMaximumResourceCapability(), - submissionContext.getQueue(), scheduler, isRecovery, rmContext); + // Find the ANY request and ensure there's only one + ResourceRequest anyReq = null; + for (ResourceRequest amReq : amReqs) { + if (amReq.getResourceName().equals(ResourceRequest.ANY)) { + if (anyReq == null) { + anyReq = amReq; + } else { + throw new InvalidResourceRequestException("Invalid resource " + + "request, only one resource request with " + + ResourceRequest.ANY + " is allowed"); + } + } + } + if (anyReq == null) { + throw new InvalidResourceRequestException("Invalid resource request, " + + "no resource request specified with " + ResourceRequest.ANY); + } + + // Make sure that all of the requests agree with the ANY request + // and have correct values + for (ResourceRequest amReq : amReqs) { + amReq.setCapability(anyReq.getCapability()); + amReq.setExecutionTypeRequest( + ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED)); + amReq.setNumContainers(1); + amReq.setPriority(RMAppAttemptImpl.AM_CONTAINER_PRIORITY); + } + + // set label expression for AM ANY request if not set + if (null == anyReq.getNodeLabelExpression()) { + anyReq.setNodeLabelExpression(submissionContext + .getNodeLabelExpression()); + } + + // Normalize all requests + for (ResourceRequest amReq : amReqs) { + SchedulerUtils.normalizeAndValidateRequest(amReq, + scheduler.getMaximumResourceCapability(), + submissionContext.getQueue(), scheduler, isRecovery, rmContext); + + amReq.setCapability( + scheduler.getNormalizedResource(amReq.getCapability())); + } + return amReqs; } catch (InvalidResourceRequestException e) { LOG.warn("RM app submission failed in validating AM resource request" + " for application " + submissionContext.getApplicationId(), e); throw e; } - - amReq.setCapability(scheduler.getNormalizedResource(amReq.getCapability())); - return amReq; } - + return null; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index 0aa7a2c..47c9b60 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Set; +import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.AccessControlException; @@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerUpdateType; import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; @@ -557,19 +559,38 @@ public static void validateApplicationTimeouts( * * @param rmContext context * @param conf configuration - * @param amreq am resource request + * @param amReqs am resource requests * @return applicable node count */ public static int getApplicableNodeCountForAM(RMContext rmContext, - Configuration conf, ResourceRequest amreq) { + Configuration conf, List amReqs) { + Set nodesForReqs = new HashSet<>(); + for (ResourceRequest amReq : amReqs) { + if (amReq.getRelaxLocality() && + !amReq.getResourceName().equals(ResourceRequest.ANY)) { + nodesForReqs.addAll( + rmContext.getScheduler().getClusterNodeIdsByResourceName( + amReq.getResourceName())); + } + } + if (YarnConfiguration.areNodeLabelsEnabled(conf)) { RMNodeLabelsManager labelManager = rmContext.getNodeLabelManager(); - String amNodeLabelExpression = amreq.getNodeLabelExpression(); + String amNodeLabelExpression = amReqs.get(0).getNodeLabelExpression(); amNodeLabelExpression = (amNodeLabelExpression == null || amNodeLabelExpression.trim().isEmpty()) ? RMNodeLabelsManager.NO_LABEL : amNodeLabelExpression; - return labelManager.getActiveNMCountPerLabel(amNodeLabelExpression); + Set nodesForLabel = + labelManager.getActiveNMPerLabel(amNodeLabelExpression); + if (nodesForReqs.isEmpty()) { + return nodesForLabel.size(); + } + return Sets.intersection(nodesForLabel, nodesForReqs).size(); + } + + if (nodesForReqs.isEmpty()) { + return rmContext.getScheduler().getNumClusterNodes(); } - return rmContext.getScheduler().getNumClusterNodes(); + return nodesForReqs.size(); } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java index effe422..128510a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java @@ -366,6 +366,24 @@ public int getActiveNMCountPerLabel(String label) { } } + /* + * Get active node based on label. + */ + public Set getActiveNMPerLabel(String label) { + if (label == null) { + return Collections.EMPTY_SET; + } + try { + readLock.lock(); + RMNodeLabel labelInfo = labelCollections.get(label); + return (labelInfo == null) + ? Collections.EMPTY_SET + : labelInfo.getActiveNodeIds(); + } finally { + readLock.unlock(); + } + } + public Set getLabelsOnNode(NodeId nodeId) { try { readLock.lock(); @@ -440,7 +458,7 @@ private void updateResourceMappings(Map before, if (oldLabels.isEmpty()) { // update labels RMNodeLabel label = labelCollections.get(NO_LABEL); - label.removeNode(oldNM.resource); + label.removeNode(oldNM); // update queues, all queue can access this node for (Queue q : queueCollections.values()) { @@ -453,7 +471,7 @@ private void updateResourceMappings(Map before, if (null == label) { continue; } - label.removeNode(oldNM.resource); + label.removeNode(oldNM); } // update queues, only queue can access this node will be subtract @@ -475,7 +493,7 @@ private void updateResourceMappings(Map before, if (newLabels.isEmpty()) { // update labels RMNodeLabel label = labelCollections.get(NO_LABEL); - label.addNode(newNM.resource); + label.addNode(newNM); // update queues, all queue can access this node for (Queue q : queueCollections.values()) { @@ -485,7 +503,7 @@ private void updateResourceMappings(Map before, // update labels for (String labelName : newLabels) { RMNodeLabel label = labelCollections.get(labelName); - label.addNode(newNM.resource); + label.addNode(newNM); } // update queues, only queue can access this node will be subtract diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index b3a87a6..43fd1fb 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.Set; @@ -269,7 +270,7 @@ ApplicationReport createAndGetApplicationReport(String clientUserName, ReservationId getReservationId(); - ResourceRequest getAMResourceRequest(); + List getAMResourceRequests(); Map getLogAggregationReportsForApp(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 9f00b2e..69c292a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -195,7 +195,7 @@ private RMAppEvent eventCausingFinalSaving; private RMAppState targetedFinalState; private RMAppState recoveredFinalState; - private ResourceRequest amReq; + private List amReqs; private CallerContext callerContext; @@ -423,10 +423,10 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext, ApplicationSubmissionContext submissionContext, YarnScheduler scheduler, ApplicationMasterService masterService, long submitTime, String applicationType, Set applicationTags, - ResourceRequest amReq) { + List amReqs) { this(applicationId, rmContext, config, name, user, queue, submissionContext, scheduler, masterService, submitTime, applicationType, applicationTags, - amReq, -1); + amReqs, -1); } public RMAppImpl(ApplicationId applicationId, RMContext rmContext, @@ -434,7 +434,7 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext, ApplicationSubmissionContext submissionContext, YarnScheduler scheduler, ApplicationMasterService masterService, long submitTime, String applicationType, Set applicationTags, - ResourceRequest amReq, long startTime) { + List amReqs, long startTime) { this.systemClock = SystemClock.getInstance(); @@ -457,7 +457,7 @@ public RMAppImpl(ApplicationId applicationId, RMContext rmContext, } this.applicationType = applicationType; this.applicationTags = applicationTags; - this.amReq = amReq; + this.amReqs = amReqs; if (submissionContext.getPriority() != null) { this.applicationPriority = Priority .newInstance(submissionContext.getPriority().getPriority()); @@ -986,7 +986,7 @@ private void createNewAttempt(ApplicationAttemptId appAttemptId) { if (amBlacklistingEnabled && !submissionContext.getUnmanagedAM()) { currentAMBlacklistManager = new SimpleBlacklistManager( RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, - getAMResourceRequest()), + getAMResourceRequests()), blacklistDisableThreshold); } else { currentAMBlacklistManager = new DisabledBlacklistManager(); @@ -994,7 +994,7 @@ private void createNewAttempt(ApplicationAttemptId appAttemptId) { } RMAppAttempt attempt = new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService, - submissionContext, conf, amReq, this, currentAMBlacklistManager); + submissionContext, conf, amReqs, this, currentAMBlacklistManager); attempts.put(appAttemptId, attempt); currentAttempt = attempt; } @@ -1689,8 +1689,8 @@ public ReservationId getReservationId() { } @Override - public ResourceRequest getAMResourceRequest() { - return this.amReq; + public List getAMResourceRequests() { + return this.amReqs; } @Override @@ -1958,7 +1958,9 @@ public String getAppNodeLabelExpression() { public String getAmNodeLabelExpression() { String amNodeLabelExpression = null; if (!getApplicationSubmissionContext().getUnmanagedAM()) { - amNodeLabelExpression = getAMResourceRequest().getNodeLabelExpression(); + amNodeLabelExpression = + getAMResourceRequests() != null && !getAMResourceRequests().isEmpty() + ? getAMResourceRequests().get(0).getNodeLabelExpression() : null; amNodeLabelExpression = (amNodeLabelExpression == null) ? NodeLabel.NODE_LABEL_EXPRESSION_NOT_SET : amNodeLabelExpression; amNodeLabelExpression = (amNodeLabelExpression.trim().isEmpty()) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 5c0f48e..19503e5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -192,7 +192,7 @@ private Object transitionTodo; private RMAppAttemptMetrics attemptMetrics = null; - private ResourceRequest amReq = null; + private List amReqs = null; private BlacklistManager blacklistedNodesForAM = null; private String amLaunchDiagnostics; @@ -485,16 +485,16 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, RMContext rmContext, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, - Configuration conf, ResourceRequest amReq, RMApp rmApp) { + Configuration conf, List amReqs, RMApp rmApp) { this(appAttemptId, rmContext, scheduler, masterService, submissionContext, - conf, amReq, rmApp, new DisabledBlacklistManager()); + conf, amReqs, rmApp, new DisabledBlacklistManager()); } public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, RMContext rmContext, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, - Configuration conf, ResourceRequest amReq, RMApp rmApp, + Configuration conf, List amReqs, RMApp rmApp, BlacklistManager amBlacklistManager) { this.conf = conf; this.applicationAttemptId = appAttemptId; @@ -514,7 +514,7 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, this.attemptMetrics = new RMAppAttemptMetrics(applicationAttemptId, rmContext); - this.amReq = amReq; + this.amReqs = amReqs; this.blacklistedNodesForAM = amBlacklistManager; final int diagnosticsLimitKC = getDiagnosticsLimitKCOrThrow(conf); @@ -1090,18 +1090,21 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, // will be passed to scheduler, and scheduler will deduct the number after // AM container allocated - // Currently, following fields are all hard code, + // Currently, following fields are all hard coded, // TODO: change these fields when we want to support - // priority/resource-name/relax-locality specification for AM containers - // allocation. - appAttempt.amReq.setNumContainers(1); - appAttempt.amReq.setPriority(AM_CONTAINER_PRIORITY); - appAttempt.amReq.setResourceName(ResourceRequest.ANY); - appAttempt.amReq.setRelaxLocality(true); - - appAttempt.getAMBlacklistManager().refreshNodeHostCount( + // priority or multiple containers AM container allocation. + for (ResourceRequest amReq : appAttempt.amReqs) { + amReq.setNumContainers(1); + amReq.setPriority(AM_CONTAINER_PRIORITY); + } + + int numNodes = RMServerUtils.getApplicableNodeCountForAM(appAttempt.rmContext, - appAttempt.conf, appAttempt.amReq)); + appAttempt.conf, appAttempt.amReqs); + if (LOG.isDebugEnabled()) { + LOG.debug("Setting node count for blacklist to " + numNodes); + } + appAttempt.getAMBlacklistManager().refreshNodeHostCount(numNodes); ResourceBlacklistRequest amBlacklist = appAttempt.getAMBlacklistManager().getBlacklistUpdates(); @@ -1114,7 +1117,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, Allocation amContainerAllocation = appAttempt.scheduler.allocate( appAttempt.applicationAttemptId, - Collections.singletonList(appAttempt.amReq), + appAttempt.amReqs, EMPTY_CONTAINER_RELEASE_LIST, amBlacklist.getBlacklistAdditions(), amBlacklist.getBlacklistRemovals(), diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 213839d..99105cd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -258,6 +258,16 @@ protected void initMaximumResourceCapability(Resource maximumAllocation) { nodeTracker.setConfiguredMaxAllocation(maximumAllocation); } + @Override + public int getNumClusterNodes() { + return nodeTracker.nodeCount(); + } + + @Override + public List getClusterNodeIdsByResourceName(String resourceName) { + return nodeTracker.getNodeIdsByResourceName(resourceName); + } + public SchedulerHealth getSchedulerHealth() { return this.schedulerHealth; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java index e487f69..010e645 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java @@ -268,6 +268,9 @@ private void updateMaxResources(SchedulerNode node, boolean add) { /** * Convenience method to filter nodes based on a condition. + * + * @param nodeFilter A {@link NodeFilter} for filtering the nodes + * @return A list of filtered nodes */ public List getNodes(NodeFilter nodeFilter) { List nodeList = new ArrayList<>(); @@ -288,6 +291,37 @@ private void updateMaxResources(SchedulerNode node, boolean add) { return nodeList; } + public List getAllNodeIds() { + return getNodeIds(null); + } + + /** + * Convenience method to filter nodes based on a condition. + * + * @param nodeFilter A {@link NodeFilter} for filtering the nodes + * @return A list of filtered nodes + */ + public List getNodeIds(NodeFilter nodeFilter) { + List nodeList = new ArrayList<>(); + readLock.lock(); + try { + if (nodeFilter == null) { + for (N node : nodes.values()) { + nodeList.add(node.getNodeID()); + } + } else { + for (N node : nodes.values()) { + if (nodeFilter.accept(node)) { + nodeList.add(node.getNodeID()); + } + } + } + } finally { + readLock.unlock(); + } + return nodeList; + } + /** * Convenience method to sort nodes. * @@ -320,11 +354,38 @@ private void updateMaxResources(SchedulerNode node, boolean add) { resourceName != null && !resourceName.isEmpty()); List retNodes = new ArrayList<>(); if (ResourceRequest.ANY.equals(resourceName)) { - return getAllNodes(); + retNodes.addAll(getAllNodes()); } else if (nodeNameToNodeMap.containsKey(resourceName)) { retNodes.add(nodeNameToNodeMap.get(resourceName)); } else if (nodesPerRack.containsKey(resourceName)) { - return nodesPerRack.get(resourceName); + retNodes.addAll(nodesPerRack.get(resourceName)); + } else { + LOG.info( + "Could not find a node matching given resourceName " + resourceName); + } + return retNodes; + } + + /** + * Convenience method to return list of {@link NodeId} corresponding to + * resourceName passed in the {@link ResourceRequest}. + * + * @param resourceName Host/rack name of the resource, or + * {@link ResourceRequest#ANY} + * @return list of {@link NodeId} that match the resourceName + */ + public List getNodeIdsByResourceName(final String resourceName) { + Preconditions.checkArgument( + resourceName != null && !resourceName.isEmpty()); + List retNodes = new ArrayList<>(); + if (ResourceRequest.ANY.equals(resourceName)) { + retNodes.addAll(getAllNodeIds()); + } else if (nodeNameToNodeMap.containsKey(resourceName)) { + retNodes.add(nodeNameToNodeMap.get(resourceName).getNodeID()); + } else if (nodesPerRack.containsKey(resourceName)) { + for (N node : nodesPerRack.get(resourceName)) { + retNodes.add(node.getNodeID()); + } } else { LOG.info( "Could not find a node matching given resourceName " + resourceName); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 08e0603..532a238 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -124,6 +124,15 @@ public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, @Public @Stable public int getNumClusterNodes(); + + /** + * Get the {@link NodeId} available in the cluster by resource name. + * @param resourceName resource name + * @return the number of available {@link NodeId} by resource name. + */ + @LimitedPrivate("yarn") + @Evolving + List getClusterNodeIdsByResourceName(String resourceName); /** * The main api between the ApplicationMaster and the Scheduler. diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 20ea607..4746e35 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -266,11 +266,6 @@ public void setResourceCalculator(ResourceCalculator rc) { } @Override - public int getNumClusterNodes() { - return nodeTracker.nodeCount(); - } - - @Override public RMContext getRMContext() { return this.rmContext; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index fea29bb..5c0b718 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -141,18 +141,20 @@ public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, Resource amResource; String partition; - if (rmApp == null || rmApp.getAMResourceRequest() == null) { + if (rmApp == null || rmApp.getAMResourceRequests() == null + || rmApp.getAMResourceRequests().isEmpty()) { // the rmApp may be undefined (the resource manager checks for this too) // and unmanaged applications do not provide an amResource request // in these cases, provide a default using the scheduler amResource = rmContext.getScheduler().getMinimumResourceCapability(); partition = CommonNodeLabelsManager.NO_LABEL; } else { - amResource = rmApp.getAMResourceRequest().getCapability(); + amResource = rmApp.getAMResourceRequests().get(0).getCapability(); partition = - (rmApp.getAMResourceRequest().getNodeLabelExpression() == null) + (rmApp.getAMResourceRequests().get(0) + .getNodeLabelExpression() == null) ? CommonNodeLabelsManager.NO_LABEL - : rmApp.getAMResourceRequest().getNodeLabelExpression(); + : rmApp.getAMResourceRequests().get(0).getNodeLabelExpression(); } setAppAMNodePartitionName(partition); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 4f3e4f9..972ccf7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1407,11 +1407,6 @@ public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, } @Override - public int getNumClusterNodes() { - return nodeTracker.nodeCount(); - } - - @Override public boolean checkAccess(UserGroupInformation callerUGI, QueueACL acl, String queueName) { try { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index a8d4f48..f535709 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -306,11 +306,6 @@ public synchronized Configuration getConf() { } @Override - public int getNumClusterNodes() { - return nodeTracker.nodeCount(); - } - - @Override public synchronized void setRMContext(RMContext rmContext) { this.rmContext = rmContext; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index 4e85b67..10e627a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -229,7 +229,7 @@ public AppInfo(ResourceManager rm, RMApp app, Boolean hasAccess, appNodeLabelExpression = app.getApplicationSubmissionContext().getNodeLabelExpression(); amNodeLabelExpression = (unmanagedApplication) ? null - : app.getAMResourceRequest().getNodeLabelExpression(); + : app.getAMResourceRequests().get(0).getNodeLabelExpression(); // Setting partition based resource usage of application ResourceScheduler scheduler = rm.getRMContext().getScheduler(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index f9f42ad..aca2fc5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -678,6 +679,17 @@ public RMApp submitApp(Credentials cred, ByteBuffer tokensConf) tokensConf); } + public RMApp submitApp(List amResourceRequests) + throws Exception { + return submitApp(amResourceRequests, "app1", + "user", null, false, null, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null, null, true, + false, false, null, 0, null, true, + amResourceRequests.get(0).getPriority(), + amResourceRequests.get(0).getNodeLabelExpression(), null, null); + } + public RMApp submitApp(Resource capability, String name, String user, Map acls, boolean unmanaged, String queue, int maxAppAttempts, Credentials ts, String appType, @@ -688,6 +700,30 @@ public RMApp submitApp(Resource capability, String name, String user, Map applicationTimeouts, ByteBuffer tokensConf) throws Exception { + priority = (priority == null) ? Priority.newInstance(0) : priority; + ResourceRequest amResourceRequest = ResourceRequest.newInstance( + priority, ResourceRequest.ANY, capability, 1); + if (amLabel != null && !amLabel.isEmpty()) { + amResourceRequest.setNodeLabelExpression(amLabel.trim()); + } + return submitApp(Collections.singletonList(amResourceRequest), name, user, + acls, unmanaged, queue, maxAppAttempts, ts, appType, waitForAccepted, + keepContainers, isAppIdProvided, applicationId, + attemptFailuresValidityInterval, logAggregationContext, + cancelTokensWhenComplete, priority, amLabel, applicationTimeouts, + tokensConf); + } + + public RMApp submitApp(List amResourceRequests, String name, + String user, Map acls, boolean unmanaged, + String queue, int maxAppAttempts, Credentials ts, String appType, + boolean waitForAccepted, boolean keepContainers, boolean isAppIdProvided, + ApplicationId applicationId, long attemptFailuresValidityInterval, + LogAggregationContext logAggregationContext, + boolean cancelTokensWhenComplete, Priority priority, String amLabel, + Map applicationTimeouts, + ByteBuffer tokensConf) + throws Exception { ApplicationId appId = isAppIdProvided ? applicationId : null; ApplicationClientProtocol client = getClientRMService(); if (! isAppIdProvided) { @@ -718,7 +754,6 @@ public RMApp submitApp(Resource capability, String name, String user, sub.setApplicationType(appType); ContainerLaunchContext clc = Records .newRecord(ContainerLaunchContext.class); - sub.setResource(capability); clc.setApplicationACLs(acls); if (ts != null && UserGroupInformation.isSecurityEnabled()) { DataOutputBuffer dob = new DataOutputBuffer(); @@ -733,12 +768,12 @@ public RMApp submitApp(Resource capability, String name, String user, sub.setLogAggregationContext(logAggregationContext); } sub.setCancelTokensWhenComplete(cancelTokensWhenComplete); - ResourceRequest amResourceRequest = ResourceRequest.newInstance( - Priority.newInstance(0), ResourceRequest.ANY, capability, 1); if (amLabel != null && !amLabel.isEmpty()) { - amResourceRequest.setNodeLabelExpression(amLabel.trim()); + for (ResourceRequest amResourceRequest : amResourceRequests) { + amResourceRequest.setNodeLabelExpression(amLabel.trim()); + } } - sub.setAMContainerResourceRequest(amResourceRequest); + sub.setAMContainerResourceRequests(amResourceRequests); req.setApplicationSubmissionContext(sub); UserGroupInformation fakeUser = UserGroupInformation.createUserForTesting(user, new String[] {"someGroup"}); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index 892f8ba..eb69efa 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -31,6 +31,8 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.concurrent.ConcurrentMap; @@ -50,6 +52,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ExecutionType; +import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -57,11 +61,13 @@ import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp; @@ -72,6 +78,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; @@ -312,7 +319,7 @@ public void testQueueSubmitWithNoPermission() throws IOException { ResourceRequest resReg = ResourceRequest.newInstance(Priority.newInstance(0), ResourceRequest.ANY, Resource.newInstance(1024, 1), 1); - sub.setAMContainerResourceRequest(resReg); + sub.setAMContainerResourceRequests(Collections.singletonList(resReg)); req.setApplicationSubmissionContext(sub); sub.setAMContainerSpec(mock(ContainerLaunchContext.class)); try { @@ -522,8 +529,157 @@ protected void setupDispatcher(RMContext rmContext, Configuration conf) { Assert.assertEquals("app event type is wrong before", RMAppEventType.KILL, appEventType); } + @SuppressWarnings("deprecation") + @Test + public void testRMAppSubmitAMContainerResourceRequests() throws Exception { + asContext.setResource(Resources.createResource(1024)); + asContext.setAMContainerResourceRequest( + ResourceRequest.newInstance(Priority.newInstance(0), + ResourceRequest.ANY, Resources.createResource(1024), 1, true)); + List reqs = new ArrayList<>(); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(0), + ResourceRequest.ANY, Resources.createResource(1025), 1, false)); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(0), + "/rack", Resources.createResource(1025), 1, false)); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(0), + "/rack/node", Resources.createResource(1025), 1, true)); + asContext.setAMContainerResourceRequests(cloneResourceRequests(reqs)); + // getAMContainerResourceRequest uses the first entry of + // getAMContainerResourceRequests + Assert.assertEquals(reqs.get(0), asContext.getAMContainerResourceRequest()); + Assert.assertEquals(reqs, asContext.getAMContainerResourceRequests()); + RMApp app = testRMAppSubmit(); + for (ResourceRequest req : reqs) { + req.setNodeLabelExpression(RMNodeLabelsManager.NO_LABEL); + } + // setAMContainerResourceRequests has priority over + // setAMContainerResourceRequest and setResource + Assert.assertEquals(reqs, app.getAMResourceRequests()); + } + + @SuppressWarnings("deprecation") + @Test + public void testRMAppSubmitAMContainerResourceRequest() throws Exception { + asContext.setResource(Resources.createResource(1024)); + asContext.setAMContainerResourceRequests(null); + ResourceRequest req = + ResourceRequest.newInstance(Priority.newInstance(0), + ResourceRequest.ANY, Resources.createResource(1025), 1, true); + asContext.setAMContainerResourceRequest(cloneResourceRequest(req)); + // getAMContainerResourceRequests uses a singleton list of + // getAMContainerResourceRequest + Assert.assertEquals(req, asContext.getAMContainerResourceRequest()); + Assert.assertEquals(req, asContext.getAMContainerResourceRequests().get(0)); + Assert.assertEquals(1, asContext.getAMContainerResourceRequests().size()); + RMApp app = testRMAppSubmit(); + req.setNodeLabelExpression(RMNodeLabelsManager.NO_LABEL); + // setAMContainerResourceRequest has priority over setResource + Assert.assertEquals(Collections.singletonList(req), + app.getAMResourceRequests()); + } + @Test - public void testRMAppSubmit() throws Exception { + public void testRMAppSubmitResource() throws Exception { + asContext.setResource(Resources.createResource(1024)); + asContext.setAMContainerResourceRequests(null); + RMApp app = testRMAppSubmit(); + // setResource + Assert.assertEquals(Collections.singletonList( + ResourceRequest.newInstance(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, + ResourceRequest.ANY, Resources.createResource(1024), 1, true, "")), + app.getAMResourceRequests()); + } + + @Test + public void testRMAppSubmitNoResourceRequests() throws Exception { + asContext.setResource(null); + asContext.setAMContainerResourceRequests(null); + try { + testRMAppSubmit(); + Assert.fail("Should have failed due to no ResourceRequest"); + } catch (InvalidResourceRequestException e) { + Assert.assertEquals( + "Invalid resource request, no resources requested", + e.getMessage()); + } + } + + @Test + public void testRMAppSubmitAMContainerResourceRequestsDisagree() + throws Exception { + asContext.setResource(null); + List reqs = new ArrayList<>(); + ResourceRequest anyReq = ResourceRequest.newInstance( + Priority.newInstance(1), + ResourceRequest.ANY, Resources.createResource(1024), 1, false, "label1", + ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED)); + reqs.add(anyReq); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(2), + "/rack", Resources.createResource(1025), 2, false, "", + ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC))); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(3), + "/rack/node", Resources.createResource(1026), 3, true, "", + ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC))); + asContext.setAMContainerResourceRequests(cloneResourceRequests(reqs)); + RMApp app = testRMAppSubmit(); + // It should force the requests to all agree on these points + for (ResourceRequest req : reqs) { + req.setCapability(anyReq.getCapability()); + req.setExecutionTypeRequest( + ExecutionTypeRequest.newInstance(ExecutionType.GUARANTEED)); + req.setNumContainers(1); + req.setPriority(Priority.newInstance(0)); + } + Assert.assertEquals(reqs, app.getAMResourceRequests()); + } + + @Test + public void testRMAppSubmitAMContainerResourceRequestsNoAny() + throws Exception { + asContext.setResource(null); + List reqs = new ArrayList<>(); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(1), + "/rack", Resources.createResource(1025), 1, false)); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(1), + "/rack/node", Resources.createResource(1025), 1, true)); + asContext.setAMContainerResourceRequests(cloneResourceRequests(reqs)); + // getAMContainerResourceRequest uses the first entry of + // getAMContainerResourceRequests + Assert.assertEquals(reqs, asContext.getAMContainerResourceRequests()); + try { + testRMAppSubmit(); + Assert.fail("Should have failed due to missing ANY ResourceRequest"); + } catch (InvalidResourceRequestException e) { + Assert.assertEquals( + "Invalid resource request, no resource request specified with *", + e.getMessage()); + } + } + + @Test + public void testRMAppSubmitAMContainerResourceRequestsTwoManyAny() + throws Exception { + asContext.setResource(null); + List reqs = new ArrayList<>(); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(1), + ResourceRequest.ANY, Resources.createResource(1025), 1, false)); + reqs.add(ResourceRequest.newInstance(Priority.newInstance(1), + ResourceRequest.ANY, Resources.createResource(1025), 1, false)); + asContext.setAMContainerResourceRequests(cloneResourceRequests(reqs)); + // getAMContainerResourceRequest uses the first entry of + // getAMContainerResourceRequests + Assert.assertEquals(reqs, asContext.getAMContainerResourceRequests()); + try { + testRMAppSubmit(); + Assert.fail("Should have failed due to too many ANY ResourceRequests"); + } catch (InvalidResourceRequestException e) { + Assert.assertEquals( + "Invalid resource request, only one resource request with * is " + + "allowed", e.getMessage()); + } + } + + private RMApp testRMAppSubmit() throws Exception { appMonitor.submitApplication(asContext, "test"); RMApp app = rmContext.getRMApps().get(appId); Assert.assertNotNull("app is null", app); @@ -534,12 +690,14 @@ public void testRMAppSubmit() throws Exception { // wait for event to be processed int timeoutSecs = 0; - while ((getAppEventType() == RMAppEventType.KILL) && + while ((getAppEventType() == RMAppEventType.KILL) && timeoutSecs++ < 20) { Thread.sleep(1000); } Assert.assertEquals("app event type sent is wrong", RMAppEventType.START, getAppEventType()); + + return app; } @Test @@ -737,6 +895,15 @@ private static ResourceScheduler mockResourceScheduler() { ResourceCalculator rs = mock(ResourceCalculator.class); when(scheduler.getResourceCalculator()).thenReturn(rs); + when(scheduler.getNormalizedResource(any())) + .thenAnswer(new Answer() { + @Override + public Resource answer(InvocationOnMock invocationOnMock) + throws Throwable { + return (Resource) invocationOnMock.getArguments()[0]; + } + }); + return scheduler; } @@ -753,4 +920,26 @@ private static Resource mockResource() { YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); } + private static ResourceRequest cloneResourceRequest(ResourceRequest req) { + return ResourceRequest.newInstance( + Priority.newInstance(req.getPriority().getPriority()), + new String(req.getResourceName()), + Resource.newInstance(req.getCapability().getMemorySize(), + req.getCapability().getVirtualCores()), + req.getNumContainers(), + req.getRelaxLocality(), + req.getNodeLabelExpression() != null + ? new String(req.getNodeLabelExpression()) : null, + ExecutionTypeRequest.newInstance( + req.getExecutionTypeRequest().getExecutionType())); + } + + private static List cloneResourceRequests( + List reqs) { + List cloneReqs = new ArrayList<>(); + for (ResourceRequest req : reqs) { + cloneReqs.add(cloneResourceRequest(req)); + } + return cloneReqs; + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 7a67aa8..f0e60a2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -38,6 +38,7 @@ import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; @@ -1307,9 +1308,9 @@ private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, spy(new RMAppImpl(applicationId3, rmContext, config, null, null, queueName, asContext, yarnScheduler, null, System.currentTimeMillis(), "YARN", null, - BuilderUtils.newResourceRequest( + Collections.singletonList(BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - Resource.newInstance(1024, 1), 1)){ + Resource.newInstance(1024, 1), 1))){ @Override public ApplicationReport createAndGetApplicationReport( String clientUserName, boolean allowAccess) { @@ -1323,7 +1324,8 @@ public ApplicationReport createAndGetApplicationReport( return report; } }); - app.getAMResourceRequest().setNodeLabelExpression(amNodeLabelExpression); + app.getAMResourceRequests().get(0) + .setNodeLabelExpression(amNodeLabelExpression); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( ApplicationId.newInstance(123456, 1), 1); RMAppAttemptImpl rmAppAttemptImpl = spy(new RMAppAttemptImpl(attemptId, diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java index c80a799..b4adf48 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestNodeBlacklistingOnAMFailures.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import java.util.ArrayList; import java.util.List; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -28,6 +29,9 @@ import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; @@ -157,6 +161,186 @@ public void testNodeBlacklistingOnAMFailure() throws Exception { } @Test(timeout = 100000) + public void testNodeBlacklistingOnAMFailureStrictNodeLocality() + throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, + true); + + DrainDispatcher dispatcher = new DrainDispatcher(); + MockRM rm = startRM(conf, dispatcher); + CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler(); + + // Register 5 nodes, so that we can blacklist atleast one if AM container + // is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1. + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService()); + nm1.registerNode(); + + MockNM nm2 = + new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService()); + nm2.registerNode(); + + MockNM nm3 = + new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService()); + nm3.registerNode(); + + MockNM nm4 = + new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService()); + nm4.registerNode(); + + MockNM nm5 = + new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService()); + nm5.registerNode(); + + // Specify a strict locality on nm2 + List reqs = new ArrayList<>(); + ResourceRequest nodeReq = ResourceRequest.newInstance( + Priority.newInstance(0), nm2.getNodeId().getHost(), + Resource.newInstance(200, 1), 1, true); + ResourceRequest rackReq = ResourceRequest.newInstance( + Priority.newInstance(0), "/default-rack", + Resource.newInstance(200, 1), 1, false); + ResourceRequest anyReq = ResourceRequest.newInstance( + Priority.newInstance(0), ResourceRequest.ANY, + Resource.newInstance(200, 1), 1, false); + reqs.add(anyReq); + reqs.add(rackReq); + reqs.add(nodeReq); + RMApp app = rm.submitApp(reqs); + + MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm2); + ContainerId amContainerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + RMContainer rmContainer = scheduler.getRMContainer(amContainerId); + NodeId nodeWhereAMRan = rmContainer.getAllocatedNode(); + Assert.assertEquals(nm2.getNodeId(), nodeWhereAMRan); + + // Set the exist status to INVALID so that we can verify that the system + // automatically blacklisting the node + makeAMContainerExit(rm, amContainerId, nm2, ContainerExitStatus.INVALID); + + // restart the am + RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm); + System.out.println("New AppAttempt launched " + attempt.getAppAttemptId()); + + nm2.nodeHeartbeat(true); + dispatcher.await(); + + // Now the AM container should be allocated + MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000); + + MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId()); + rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED); + amContainerId = + ContainerId.newContainerId(am2.getApplicationAttemptId(), 1); + rmContainer = scheduler.getRMContainer(amContainerId); + nodeWhereAMRan = rmContainer.getAllocatedNode(); + + // The second AM should be on the same node because the strict locality + // made the eligible nodes only 1, so the blacklisting threshold kicked in + System.out.println("AM ran on " + nodeWhereAMRan); + Assert.assertEquals(nm2.getNodeId(), nodeWhereAMRan); + + am2.registerAppAttempt(); + rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); + } + + @Test(timeout = 100000) + public void testNodeBlacklistingOnAMFailureRelaxedNodeLocality() + throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + conf.setBoolean(YarnConfiguration.AM_SCHEDULING_NODE_BLACKLISTING_ENABLED, + true); + + DrainDispatcher dispatcher = new DrainDispatcher(); + MockRM rm = startRM(conf, dispatcher); + CapacityScheduler scheduler = (CapacityScheduler) rm.getResourceScheduler(); + + // Register 5 nodes, so that we can blacklist atleast one if AM container + // is failed. As per calculation it will be like, 5nodes * 0.2 (default)=1. + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8000, rm.getResourceTrackerService()); + nm1.registerNode(); + + MockNM nm2 = + new MockNM("127.0.0.2:2345", 8000, rm.getResourceTrackerService()); + nm2.registerNode(); + + MockNM nm3 = + new MockNM("127.0.0.3:2345", 8000, rm.getResourceTrackerService()); + nm3.registerNode(); + + MockNM nm4 = + new MockNM("127.0.0.4:2345", 8000, rm.getResourceTrackerService()); + nm4.registerNode(); + + MockNM nm5 = + new MockNM("127.0.0.5:2345", 8000, rm.getResourceTrackerService()); + nm5.registerNode(); + + // Specify a relaxed locality on nm2 + List reqs = new ArrayList<>(); + ResourceRequest nodeReq = ResourceRequest.newInstance( + Priority.newInstance(0), nm2.getNodeId().getHost(), + Resource.newInstance(200, 1), 1, true); + ResourceRequest rackReq = ResourceRequest.newInstance( + Priority.newInstance(0), "/default-rack", + Resource.newInstance(200, 1), 1, true); + ResourceRequest anyReq = ResourceRequest.newInstance( + Priority.newInstance(0), ResourceRequest.ANY, + Resource.newInstance(200, 1), 1, true); + reqs.add(anyReq); + reqs.add(rackReq); + reqs.add(nodeReq); + RMApp app = rm.submitApp(reqs); + + MockAM am1 = MockRM.launchAndRegisterAM(app, rm, nm2); + ContainerId amContainerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + RMContainer rmContainer = scheduler.getRMContainer(amContainerId); + NodeId nodeWhereAMRan = rmContainer.getAllocatedNode(); + Assert.assertEquals(nm2.getNodeId(), nodeWhereAMRan); + + // Set the exist status to INVALID so that we can verify that the system + // automatically blacklisting the node + makeAMContainerExit(rm, amContainerId, nm2, ContainerExitStatus.INVALID); + + // restart the am + RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, rm); + System.out.println("New AppAttempt launched " + attempt.getAppAttemptId()); + + nm2.nodeHeartbeat(true); + nm1.nodeHeartbeat(true); + nm3.nodeHeartbeat(true); + nm4.nodeHeartbeat(true); + nm5.nodeHeartbeat(true); + dispatcher.await(); + + // Now the AM container should be allocated + MockRM.waitForState(attempt, RMAppAttemptState.ALLOCATED, 20000); + + MockAM am2 = rm.sendAMLaunched(attempt.getAppAttemptId()); + rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED); + amContainerId = + ContainerId.newContainerId(am2.getApplicationAttemptId(), 1); + rmContainer = scheduler.getRMContainer(amContainerId); + nodeWhereAMRan = rmContainer.getAllocatedNode(); + + // The second AM should be on a different node because the relaxed locality + // made the app schedulable on other nodes and nm2 is blacklisted + System.out.println("AM ran on " + nodeWhereAMRan); + Assert.assertNotEquals(nm2.getNodeId(), nodeWhereAMRan); + + am2.registerAppAttempt(); + rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); + } + + @Test(timeout = 100000) public void testNoBlacklistingForNonSystemErrors() throws Exception { YarnConfiguration conf = new YarnConfiguration(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMServerUtils.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMServerUtils.java new file mode 100644 index 0000000..ab997cc --- /dev/null +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMServerUtils.java @@ -0,0 +1,289 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class TestRMServerUtils { + @Test + public void testGetApplicableNodeCountForAMLocality() throws Exception { + List rack1Nodes = new ArrayList<>(); + for (int i = 0; i < 29; i++) { + rack1Nodes.add(NodeId.newInstance("host" + i, 1234)); + } + NodeId node1 = NodeId.newInstance("node1", 1234); + NodeId node2 = NodeId.newInstance("node2", 1234); + rack1Nodes.add(node2); + + YarnConfiguration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, false); + ResourceScheduler scheduler = Mockito.mock(ResourceScheduler.class); + Mockito.when(scheduler.getNumClusterNodes()).thenReturn(100); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("/rack1")) + .thenReturn(rack1Nodes); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("node1")) + .thenReturn(Collections.singletonList(node1)); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("node2")) + .thenReturn(Collections.singletonList(node2)); + RMContext rmContext = Mockito.mock(RMContext.class); + Mockito.when(rmContext.getScheduler()).thenReturn(scheduler); + + ResourceRequest anyReq = createResourceRequest(ResourceRequest.ANY, + true, null); + List reqs = new ArrayList<>(); + reqs.add(anyReq); + Assert.assertEquals(100, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest rackReq = createResourceRequest("/rack1", true, null); + reqs.add(rackReq); + Assert.assertEquals(30, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + anyReq.setRelaxLocality(false); + Assert.assertEquals(30, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(100, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest node1Req = createResourceRequest("node1", false, null); + reqs.add(node1Req); + Assert.assertEquals(100, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(true); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(true); + Assert.assertEquals(31, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest node2Req = createResourceRequest("node2", false, null); + reqs.add(node2Req); + Assert.assertEquals(31, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(true); + Assert.assertEquals(31, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(2, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(false); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(false); + Assert.assertEquals(100, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + } + + @Test + public void testGetApplicableNodeCountForAMLabels() throws Exception { + Set noLabelNodes = new HashSet<>(); + for (int i = 0; i < 80; i++) { + noLabelNodes.add(NodeId.newInstance("host" + i, 1234)); + } + Set label1Nodes = new HashSet<>(); + for (int i = 80; i < 90; i++) { + label1Nodes.add(NodeId.newInstance("host" + i, 1234)); + } + + YarnConfiguration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + ResourceScheduler scheduler = Mockito.mock(ResourceScheduler.class); + Mockito.when(scheduler.getNumClusterNodes()).thenReturn(100); + RMContext rmContext = Mockito.mock(RMContext.class); + Mockito.when(rmContext.getScheduler()).thenReturn(scheduler); + RMNodeLabelsManager labMan = Mockito.mock(RMNodeLabelsManager.class); + Mockito.when(labMan.getActiveNMPerLabel(RMNodeLabelsManager.NO_LABEL)) + .thenReturn(noLabelNodes); + Mockito.when(labMan.getActiveNMPerLabel("label1")).thenReturn(label1Nodes); + Mockito.when(rmContext.getNodeLabelManager()).thenReturn(labMan); + + ResourceRequest anyReq = createResourceRequest(ResourceRequest.ANY, + true, null); + List reqs = new ArrayList<>(); + reqs.add(anyReq); + Assert.assertEquals(80, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + anyReq.setNodeLabelExpression("label1"); + Assert.assertEquals(10, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + } + + @Test + public void testGetApplicableNodeCountForAMLocalityAndLabels() + throws Exception { + List rack1Nodes = new ArrayList<>(); + for (int i = 0; i < 29; i++) { + rack1Nodes.add(NodeId.newInstance("host" + i, 1234)); + } + NodeId node1 = NodeId.newInstance("node1", 1234); + NodeId node2 = NodeId.newInstance("node2", 1234); + rack1Nodes.add(node2); + Set noLabelNodes = new HashSet<>(); + for (int i = 0; i < 19; i++) { + noLabelNodes.add(rack1Nodes.get(i)); + } + noLabelNodes.add(node2); + for (int i = 29; i < 89; i++) { + noLabelNodes.add(NodeId.newInstance("host" + i, 1234)); + } + Set label1Nodes = new HashSet<>(); + label1Nodes.add(node1); + for (int i = 89; i < 93; i++) { + label1Nodes.add(NodeId.newInstance("host" + i, 1234)); + } + for (int i = 19; i < 29; i++) { + label1Nodes.add(rack1Nodes.get(i)); + } + + YarnConfiguration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + ResourceScheduler scheduler = Mockito.mock(ResourceScheduler.class); + Mockito.when(scheduler.getNumClusterNodes()).thenReturn(100); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("/rack1")) + .thenReturn(rack1Nodes); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("node1")) + .thenReturn(Collections.singletonList(node1)); + Mockito.when(scheduler.getClusterNodeIdsByResourceName("node2")) + .thenReturn(Collections.singletonList(node2)); + RMContext rmContext = Mockito.mock(RMContext.class); + Mockito.when(rmContext.getScheduler()).thenReturn(scheduler); + RMNodeLabelsManager labMan = Mockito.mock(RMNodeLabelsManager.class); + Mockito.when(labMan.getActiveNMPerLabel(RMNodeLabelsManager.NO_LABEL)) + .thenReturn(noLabelNodes); + Mockito.when(labMan.getActiveNMPerLabel("label1")).thenReturn(label1Nodes); + Mockito.when(rmContext.getNodeLabelManager()).thenReturn(labMan); + + ResourceRequest anyReq = createResourceRequest(ResourceRequest.ANY, + true, null); + List reqs = new ArrayList<>(); + reqs.add(anyReq); + Assert.assertEquals(80, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest rackReq = createResourceRequest("/rack1", true, null); + reqs.add(rackReq); + Assert.assertEquals(20, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + anyReq.setRelaxLocality(false); + Assert.assertEquals(20, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(80, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest node1Req = createResourceRequest("node1", false, null); + reqs.add(node1Req); + Assert.assertEquals(80, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(true); + Assert.assertEquals(0, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(true); + Assert.assertEquals(20, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + ResourceRequest node2Req = createResourceRequest("node2", false, null); + reqs.add(node2Req); + Assert.assertEquals(20, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(true); + Assert.assertEquals(20, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(false); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(false); + Assert.assertEquals(80, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + anyReq.setNodeLabelExpression("label1"); + rackReq.setNodeLabelExpression("label1"); + node1Req.setNodeLabelExpression("label1"); + node2Req.setNodeLabelExpression("label1"); + anyReq.setRelaxLocality(true); + reqs = new ArrayList<>(); + reqs.add(anyReq); + Assert.assertEquals(15, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + rackReq.setRelaxLocality(true); + reqs.add(rackReq); + Assert.assertEquals(10, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + anyReq.setRelaxLocality(false); + Assert.assertEquals(10, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(15, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + node1Req.setRelaxLocality(false); + reqs.add(node1Req); + Assert.assertEquals(15, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(true); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(true); + Assert.assertEquals(11, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + + node2Req.setRelaxLocality(false); + reqs.add(node2Req); + Assert.assertEquals(11, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(true); + Assert.assertEquals(11, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + rackReq.setRelaxLocality(false); + Assert.assertEquals(1, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node1Req.setRelaxLocality(false); + Assert.assertEquals(0, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + node2Req.setRelaxLocality(false); + Assert.assertEquals(15, + RMServerUtils.getApplicableNodeCountForAM(rmContext, conf, reqs)); + } + + private ResourceRequest createResourceRequest(String resource, + boolean relaxLocality, String nodeLabel) { + return ResourceRequest.newInstance(Priority.newInstance(0), + resource, Resource.newInstance(1, 1), 1, relaxLocality, nodeLabel); + } +} diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index 9be52c6..5246eb7 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -57,7 +57,7 @@ public abstract class MockAsm extends MockApps { public static class ApplicationBase implements RMApp { - ResourceRequest amReq; + List amReqs; @Override public String getUser() { throw new UnsupportedOperationException("Not supported yet."); @@ -204,8 +204,8 @@ public ReservationId getReservationId() { } @Override - public ResourceRequest getAMResourceRequest() { - return this.amReq; + public List getAMResourceRequests() { + return this.amReqs; } @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index 55e93c1..7005bca 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -526,7 +526,8 @@ private static RMApp createRMApp(ApplicationId appId) { when(app.getAppNodeLabelExpression()).thenCallRealMethod(); ResourceRequest amReq = mock(ResourceRequest.class); when(amReq.getNodeLabelExpression()).thenReturn("high-mem"); - when(app.getAMResourceRequest()).thenReturn(amReq); + when(app.getAMResourceRequests()) + .thenReturn(Collections.singletonList(amReq)); when(app.getAmNodeLabelExpression()).thenCallRealMethod(); when(app.getApplicationPriority()).thenReturn(Priority.newInstance(10)); when(app.getCallerContext()) diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index 118b6bc..9290ff8 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -21,6 +21,7 @@ import java.util.Collection; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Set; @@ -62,14 +63,14 @@ StringBuilder diagnostics = new StringBuilder(); RMAppAttempt attempt; int maxAppAttempts = 1; - ResourceRequest amReq; + List amReqs; public MockRMApp(int newid, long time, RMAppState newState) { finish = time; id = MockApps.newAppID(newid); state = newState; - amReq = ResourceRequest.newInstance(Priority.UNDEFINED, "0.0.0.0", - Resource.newInstance(0, 0), 1); + amReqs = Collections.singletonList(ResourceRequest.newInstance( + Priority.UNDEFINED, "0.0.0.0", Resource.newInstance(0, 0), 1)); } public MockRMApp(int newid, long time, RMAppState newState, String userName) { @@ -276,8 +277,8 @@ public ReservationId getReservationId() { } @Override - public ResourceRequest getAMResourceRequest() { - return this.amReq; + public List getAMResourceRequests() { + return this.amReqs; } @Override diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 4884851..5aa7af9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -30,8 +30,10 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.Map; import org.apache.commons.logging.Log; @@ -271,7 +273,8 @@ protected RMApp createNewTestApp(ApplicationSubmissionContext submissionContext) submissionContext.setAMContainerSpec(mock(ContainerLaunchContext.class)); RMApp application = new RMAppImpl(applicationId, rmContext, conf, name, user, queue, submissionContext, scheduler, masterService, - System.currentTimeMillis(), "YARN", null, mock(ResourceRequest.class)); + System.currentTimeMillis(), "YARN", null, + new ArrayList()); testAppStartState(applicationId, user, name, queue, application); this.rmContext.getRMApps().putIfAbsent(application.getApplicationId(), @@ -1024,9 +1027,9 @@ public void testRecoverApplication(ApplicationStateData appState, submissionContext.getQueue(), submissionContext, scheduler, null, appState.getSubmitTime(), submissionContext.getApplicationType(), submissionContext.getApplicationTags(), - BuilderUtils.newResourceRequest( + Collections.singletonList(BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1)); + submissionContext.getResource(), 1))); Assert.assertEquals(RMAppState.NEW, application.getState()); RMAppEvent recoverEvent = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index ced5bd9..9a4b6dc 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -328,9 +328,9 @@ public void setUp() throws Exception { applicationAttempt = new RMAppAttemptImpl(applicationAttemptId, spyRMContext, scheduler, masterService, submissionContext, new Configuration(), - BuilderUtils.newResourceRequest( + Collections.singletonList(BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1), application); + submissionContext.getResource(), 1)), application); when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt); when(application.getApplicationId()).thenReturn(applicationId); @@ -1108,9 +1108,9 @@ public void testLaunchedFailWhileAHSEnabled() { new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler,masterService, submissionContext, myConf, - BuilderUtils.newResourceRequest( + Collections.singletonList(BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1), application); + submissionContext.getResource(), 1)), application); //submit, schedule and allocate app attempt myApplicationAttempt.handle( @@ -1584,9 +1584,9 @@ public void testContainersCleanupForLastAttempt() { applicationAttempt = new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, new Configuration(), - BuilderUtils.newResourceRequest( + Collections.singletonList(BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, - submissionContext.getResource(), 1), application); + submissionContext.getResource(), 1)), application); when(submissionContext.getKeepContainersAcrossApplicationAttempts()) .thenReturn(true); when(submissionContext.getMaxAppAttempts()).thenReturn(1); @@ -1645,9 +1645,10 @@ public Allocation answer(InvocationOnMock invocation) applicationAttempt = new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, - new Configuration(), ResourceRequest.newInstance( - Priority.UNDEFINED, "host1", Resource.newInstance(3333, 1), 3, - false, "label-expression"), application); + new Configuration(), Collections.singletonList( + ResourceRequest.newInstance(Priority.UNDEFINED, "host1", + Resource.newInstance(3333, 1), 3, + false, "label-expression")), application); new RMAppAttemptImpl.ScheduleTransition().transition( (RMAppAttemptImpl) applicationAttempt, null); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index bb0a123..8aca235 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -613,7 +614,8 @@ public void testHeadroom() throws Exception { ResourceRequest amResourceRequest = mock(ResourceRequest.class); Resource amResource = Resources.createResource(0, 0); when(amResourceRequest.getCapability()).thenReturn(amResource); - when(rmApp.getAMResourceRequest()).thenReturn(amResourceRequest); + when(rmApp.getAMResourceRequests()).thenReturn( + Collections.singletonList(amResourceRequest)); Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId)Matchers.any()); when(spyRMContext.getRMApps()).thenReturn(spyApps); RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java index b70a359..0aac2ef 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimitsByPartition.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -639,7 +640,8 @@ public void testHeadroom() throws Exception { ResourceRequest amResourceRequest = mock(ResourceRequest.class); Resource amResource = Resources.createResource(0, 0); when(amResourceRequest.getCapability()).thenReturn(amResource); - when(rmApp.getAMResourceRequest()).thenReturn(amResourceRequest); + when(rmApp.getAMResourceRequests()).thenReturn( + Collections.singletonList(amResourceRequest)); Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId) Matchers.any()); when(spyRMContext.getRMApps()).thenReturn(spyApps); RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 2b60ecf..4e7baf2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -3005,7 +3005,7 @@ public void testAMUsedResource() throws Exception { RMApp rmApp = rm.submitApp(amMemory, "app-1", "user_0", null, queueName); assertEquals("RMApp does not containes minimum allocation", - minAllocResource, rmApp.getAMResourceRequest().getCapability()); + minAllocResource, rmApp.getAMResourceRequests().get(0).getCapability()); ResourceScheduler scheduler = rm.getRMContext().getScheduler(); LeafQueue queueA = diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 3fbbae3..1162b9f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -151,7 +151,8 @@ private void setUpInternal(ResourceCalculator rC) throws Exception { amResourceRequest = mock(ResourceRequest.class); when(amResourceRequest.getCapability()).thenReturn( Resources.createResource(0, 0)); - when(rmApp.getAMResourceRequest()).thenReturn(amResourceRequest); + when(rmApp.getAMResourceRequests()).thenReturn( + Collections.singletonList(amResourceRequest)); Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId)Matchers.any()); when(spyRMContext.getRMApps()).thenReturn(spyApps); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 31dd7fe..c6a55a3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -3208,6 +3208,84 @@ public void testCancelStrictLocality() throws IOException { assertEquals(1, app.getLiveContainers().size()); } + @Test + public void testAMStrictLocalityRack() throws IOException { + testAMStrictLocality(false, false); + } + + @Test + public void testAMStrictLocalityNode() throws IOException { + testAMStrictLocality(true, false); + } + + @Test + public void testAMStrictLocalityRackInvalid() throws IOException { + testAMStrictLocality(false, true); + } + + @Test + public void testAMStrictLocalityNodeInvalid() throws IOException { + testAMStrictLocality(true, true); + } + + private void testAMStrictLocality(boolean node, boolean invalid) + throws IOException { + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + RMNode node2 = MockNodes.newNodeInfo(2, Resources.createResource(1024), 2, + "127.0.0.2"); + NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); + scheduler.handle(nodeEvent2); + + List reqs = new ArrayList<>(); + ResourceRequest nodeRequest = createResourceRequest(1024, + node2.getHostName(), 1, 1, true); + if (node && invalid) { + nodeRequest.setResourceName("invalid"); + } + ResourceRequest rackRequest = createResourceRequest(1024, + node2.getRackName(), 1, 1, !node); + if (!node && invalid) { + rackRequest.setResourceName("invalid"); + } + ResourceRequest anyRequest = createResourceRequest(1024, + ResourceRequest.ANY, 1, 1, false); + reqs.add(anyRequest); + reqs.add(rackRequest); + if (node) { + reqs.add(nodeRequest); + } + + ApplicationAttemptId attId1 = + createSchedulingRequest("queue1", "user1", reqs); + + scheduler.update(); + + NodeUpdateSchedulerEvent node2UpdateEvent = + new NodeUpdateSchedulerEvent(node2); + + FSAppAttempt app = scheduler.getSchedulerApp(attId1); + + // node2 should get the container + scheduler.handle(node2UpdateEvent); + if (invalid) { + assertEquals(0, app.getLiveContainers().size()); + assertEquals(0, scheduler.getNode(node2.getNodeID()).getNumContainers()); + assertEquals(0, scheduler.getNode(node1.getNodeID()).getNumContainers()); + } else { + assertEquals(1, app.getLiveContainers().size()); + assertEquals(1, scheduler.getNode(node2.getNodeID()).getNumContainers()); + assertEquals(0, scheduler.getNode(node1.getNodeID()).getNumContainers()); + } + } + /** * Strict locality requests shouldn't reserve resources on another node. */ diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index 30f25e9..fb9e8ed 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1213,8 +1213,8 @@ public void testUnmarshalAppInfo() throws JSONException, Exception { assertEquals(app1.getApplicationId().toString(), appInfo.getAppId()); assertEquals(app1.getName(), appInfo.getName()); assertEquals(app1.createApplicationState(), appInfo.getState()); - assertEquals(app1.getAMResourceRequest().getCapability().getMemorySize(), - appInfo.getAllocatedMB()); + assertEquals(app1.getAMResourceRequests().get(0).getCapability() + .getMemorySize(), appInfo.getAllocatedMB()); rm.stop(); } @@ -1427,7 +1427,7 @@ public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException, expectedNumberOfElements++; appNodeLabelExpression = info.getString("appNodeLabelExpression"); } - if (app.getAMResourceRequest().getNodeLabelExpression() != null) { + if (app.getAMResourceRequests().get(0).getNodeLabelExpression() != null) { expectedNumberOfElements++; amNodeLabelExpression = info.getString("amNodeLabelExpression"); } @@ -1534,7 +1534,7 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user, app.getApplicationSubmissionContext().getNodeLabelExpression(), appNodeLabelExpression); assertEquals("unmanagedApplication doesn't match", - app.getAMResourceRequest().getNodeLabelExpression(), + app.getAMResourceRequests().get(0).getNodeLabelExpression(), amNodeLabelExpression); assertEquals("amRPCAddress", AppInfo.getAmRPCAddressFromRMAppAttempt(app.getCurrentAppAttempt()), @@ -1561,7 +1561,7 @@ public void verifyResourceRequestsGeneric(RMApp app, String nodeLabelExpression, int numContainers, boolean relaxLocality, int priority, String resourceName, long memory, long vCores, String executionType, boolean enforceExecutionType) { - ResourceRequest request = app.getAMResourceRequest(); + ResourceRequest request = app.getAMResourceRequests().get(0); assertEquals("nodeLabelExpression doesn't match", request.getNodeLabelExpression(), nodeLabelExpression); assertEquals("numContainers doesn't match", request.getNumContainers(),