diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java index e148c32..3a8dc23 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java @@ -98,6 +98,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NMToken; @@ -1576,7 +1577,7 @@ public MyFifoScheduler(RMContext rmContext) { public synchronized Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, List release, - List blacklistAdditions, List blacklistRemovals) { + List blacklistAdditions, List blacklistRemovals, List increaseRequests, List decreaseRequests) { List askCopy = new ArrayList(); for (ResourceRequest req : ask) { ResourceRequest reqCopy = ResourceRequest.newInstance(req @@ -1591,7 +1592,7 @@ public synchronized Allocation allocate( lastBlacklistRemovals = blacklistRemovals; return super.allocate( applicationAttemptId, askCopy, release, - blacklistAdditions, blacklistRemovals); + blacklistAdditions, blacklistRemovals, increaseRequests, decreaseRequests); } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 2d2c3e0..4811bdf 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -174,6 +175,19 @@ public String getNodeManagerVersion() { public Set getNodeLabels() { return RMNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResposneForContainersDecreasing( + NodeHeartbeatResponse response) { + // TODO Auto-generated method stub + + } + + @Override + public List pullNewlyIncreasedContainers() { + // TODO Auto-generated method stub + return null; + } } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index ecc4734..a64a3e2 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -163,4 +164,16 @@ public String getNodeManagerVersion() { public Set getNodeLabels() { return RMNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResposneForContainersDecreasing( + NodeHeartbeatResponse response) { + // TODO Auto-generated method stub + } + + @Override + public List pullNewlyIncreasedContainers() { + // TODO Auto-generated method stub + return null; + } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index 14e2645..5828c8f6 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; @@ -204,13 +206,13 @@ public void run() { public Allocation allocate(ApplicationAttemptId attemptId, List resourceRequests, List containerIds, - List strings, List strings2) { + List strings, List strings2, List increaseRequests, List decreaseRequests) { if (metricsON) { final Timer.Context context = schedulerAllocateTimer.time(); Allocation allocation = null; try { allocation = scheduler.allocate(attemptId, resourceRequests, - containerIds, strings, strings2); + containerIds, strings, strings2, null, null); return allocation; } finally { context.stop(); @@ -224,7 +226,7 @@ public Allocation allocate(ApplicationAttemptId attemptId, } } else { return scheduler.allocate(attemptId, - resourceRequests, containerIds, strings, strings2); + resourceRequests, containerIds, strings, strings2, null, null); } } @@ -959,4 +961,12 @@ public Priority checkAndGetApplicationPriority(Priority priority, return Priority.newInstance(0); } + @Override + protected void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + + } + } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java index a4416db..48da36e 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -178,13 +179,13 @@ public void run() { public Allocation allocate(ApplicationAttemptId attemptId, List resourceRequests, List containerIds, - List strings, List strings2) { + List strings, List strings2, List increaseRequests, List decreaseRequests) { if (metricsON) { final Timer.Context context = schedulerAllocateTimer.time(); Allocation allocation = null; try { allocation = super.allocate(attemptId, resourceRequests, - containerIds, strings, strings2); + containerIds, strings, strings2, increaseRequests, decreaseRequests); return allocation; } finally { context.stop(); @@ -198,7 +199,7 @@ public Allocation allocate(ApplicationAttemptId attemptId, } } else { return super.allocate(attemptId, - resourceRequests, containerIds, strings, strings2); + resourceRequests, containerIds, strings, strings2, increaseRequests, decreaseRequests); } } @@ -426,7 +427,7 @@ private void tearDown() throws IOException { if (pool != null) pool.shutdown(); } - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) private void initMetrics() throws Exception { metrics = new MetricRegistry(); // configuration diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java index 108ad37..8d5546c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; @@ -525,7 +526,7 @@ public MyFifoScheduler(RMContext rmContext) { public synchronized Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, List release, List blacklistAdditions, - List blacklistRemovals) { + List blacklistRemovals, List increaseRequests, List decreaseRequests) { List askCopy = new ArrayList(); for (ResourceRequest req : ask) { ResourceRequest reqCopy = @@ -539,7 +540,7 @@ public synchronized Allocation allocate( lastBlacklistAdditions = blacklistAdditions; lastBlacklistRemovals = blacklistRemovals; return super.allocate(applicationAttemptId, askCopy, release, - blacklistAdditions, blacklistRemovals); + blacklistAdditions, blacklistRemovals, increaseRequests, decreaseRequests); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 38fbc82..c0ccf57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -19,12 +19,13 @@ package org.apache.hadoop.yarn.server.api.protocolrecords; import java.nio.ByteBuffer; +import java.util.Collection; import java.util.List; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.NodeAction; @@ -73,5 +74,5 @@ void setSystemCredentialsForApps( void setAreNodeLabelsAcceptedByRM(boolean areNodeLabelsAcceptedByRM); List getContainersToDecrease(); - void addAllContainersToDecrease(List containersToDecrease); + void addAllContainersToDecrease(Collection containersToDecrease); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index 12c5230..dc65141 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -20,14 +20,15 @@ import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; @@ -437,7 +438,7 @@ private void initContainersToDecrease() { @Override public void addAllContainersToDecrease( - final List containersToDecrease) { + final Collection containersToDecrease) { if (containersToDecrease == null) { return; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index c8b985d..2d8bb35 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -425,6 +425,10 @@ public boolean hasApplicationMasterRegistered( new ArrayList(); protected static final Allocation EMPTY_ALLOCATION = new Allocation( EMPTY_CONTAINER_LIST, Resources.createResource(0), null, null, null); + + private void checkDuplicatedIncreaseDecreaseRequest() { + + } @Override public AllocateResponse allocate(AllocateRequest request) @@ -508,11 +512,13 @@ public AllocateResponse allocate(AllocateRequest request) req.setNodeLabelExpression(asc.getNodeLabelExpression()); } } + + Resource maximumCapacity = rScheduler.getMaximumResourceCapability(); // sanity check try { RMServerUtils.normalizeAndValidateRequests(ask, - rScheduler.getMaximumResourceCapability(), app.getQueue(), + maximumCapacity, app.getQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { LOG.warn("Invalid resource ask by application " + appAttemptId, e); @@ -526,6 +532,18 @@ public AllocateResponse allocate(AllocateRequest request) throw e; } + try { + RMServerUtils.checkDuplicatedIncreaseDecreaseRequest( + request.getIncreaseRequests(), request.getDecreaseRequests()); + RMServerUtils.validateIncreaseDecreaseRequest( + request.getIncreaseRequests(), maximumCapacity, true); + RMServerUtils.validateIncreaseDecreaseRequest( + request.getDecreaseRequests(), maximumCapacity, false); + } catch (InvalidResourceRequestException e) { + LOG.warn(e); + throw e; + } + // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.getApplicationSubmissionContext() @@ -550,8 +568,9 @@ public AllocateResponse allocate(AllocateRequest request) allocation = EMPTY_ALLOCATION; } else { allocation = - this.rScheduler.allocate(appAttemptId, ask, release, - blacklistAdditions, blacklistRemovals); + this.rScheduler.allocate(appAttemptId, ask, release, + blacklistAdditions, blacklistRemovals, + request.getIncreaseRequests(), request.getDecreaseRequests()); } if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) { @@ -597,6 +616,10 @@ public AllocateResponse allocate(AllocateRequest request) .pullJustFinishedContainers()); allocateResponse.setResponseId(lastResponse.getResponseId() + 1); allocateResponse.setAvailableResources(allocation.getResourceLimit()); + + // Handling increased/decreased containers + allocateResponse.setIncreasedContainers(allocation.getIncreasedContainers()); + allocateResponse.setDecreasedContainers(allocation.getDecreasedContainers()); allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 3e16165..e8892ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -275,7 +275,6 @@ public InetSocketAddress getBindAddress() { * @param owner * @param operationPerformed * @param application - * @return */ private boolean checkAccess(UserGroupInformation callerUGI, String owner, ApplicationAccessType operationPerformed, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java index f049d97..cd9a61d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java @@ -56,6 +56,8 @@ public static final String RELEASE_CONTAINER = "AM Released Container"; public static final String UPDATE_APP_PRIORITY = "Update Application Priority Request"; + public static final String CHANGE_CONTAINER_RESOURCE = + "AM Changed Container Resource"; // Some commonly used descriptions public static final String UNAUTHORIZED_USER = "Unauthorized user"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index 4d2e41c..7efc270 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -22,8 +22,10 @@ import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; @@ -123,6 +126,54 @@ public static void validateBlacklistRequest( } } } + + public static void checkDuplicatedIncreaseDecreaseRequest( + List incRequests, + List decRequests) + throws InvalidResourceRequestException { + String msg = "There're multiple increase or decrease container requests " + + "for same containerId="; + Set existedContainerIds = new HashSet(); + for (ContainerResourceChangeRequest r : incRequests) { + if (!existedContainerIds.add(r.getContainerId())) { + throw new InvalidResourceRequestException(msg + r.getContainerId()); + } + } + for (ContainerResourceChangeRequest r : decRequests) { + if (!existedContainerIds.add(r.getContainerId())) { + throw new InvalidResourceRequestException(msg + r.getContainerId()); + } + } + } + + public static void validateIncreaseDecreaseRequest( + List requests, + Resource maximumAllocation, boolean increase) + throws InvalidResourceRequestException { + for (ContainerResourceChangeRequest request : requests) { + if (request.getCapability().getMemory() < 0 + || request.getCapability().getMemory() > maximumAllocation + .getMemory()) { + throw new InvalidResourceRequestException("Invalid " + + (increase ? "increase" : "decrease") + " request" + + ", requested memory < 0" + + ", or requested memory > max configured" + ", requestedMemory=" + + request.getCapability().getMemory() + ", maxMemory=" + + maximumAllocation.getMemory()); + } + if (request.getCapability().getVirtualCores() < 0 + || request.getCapability().getVirtualCores() > maximumAllocation + .getVirtualCores()) { + throw new InvalidResourceRequestException("Invalid " + + (increase ? "increase" : "decrease") + " request" + + ", requested virtual cores < 0" + + ", or requested virtual cores > max configured" + + ", requestedVirtualCores=" + + request.getCapability().getVirtualCores() + ", maxVirtualCores=" + + maximumAllocation.getVirtualCores()); + } + } + } /** * It will validate to make sure all the containers belong to correct diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 100e991..06a9a06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -449,6 +449,8 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) getResponseId() + 1, NodeAction.NORMAL, null, null, null, null, nextHeartBeatInterval); rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); + rmNode.updateNodeHeartbeatResposneForContainersDecreasing( + nodeHeartBeatResponse); populateKeys(request, nodeHeartBeatResponse); @@ -461,8 +463,9 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) // 4. Send status to RMNode, saving the latest response. RMNodeStatusEvent nodeStatusEvent = new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(), - remoteNodeStatus.getContainersStatuses(), - remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse); + remoteNodeStatus.getContainersStatuses(), + remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse, + remoteNodeStatus.getIncreasedContainers()); if (request.getLogAggregationReportsForApps() != null && !request.getLogAggregationReportsForApps().isEmpty()) { nodeStatusEvent.setLogAggregationReportsForApps(request diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 74a4000..f0fe39a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -944,7 +944,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, Allocation amContainerAllocation = appAttempt.scheduler.allocate(appAttempt.applicationAttemptId, Collections.singletonList(appAttempt.amReq), - EMPTY_CONTAINER_RELEASE_LIST, null, null); + EMPTY_CONTAINER_RELEASE_LIST, null, null, null, null); if (amContainerAllocation != null && amContainerAllocation.getContainers() != null) { assert (amContainerAllocation.getContainers().size() == 0); @@ -968,7 +968,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, Allocation amContainerAllocation = appAttempt.scheduler.allocate(appAttempt.applicationAttemptId, EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST, null, - null); + null, null, null); // There must be at least one container allocated, because a // CONTAINER_ALLOCATED is emitted after an RMContainer is constructed, // and is put in SchedulerApplication#newlyAllocatedContainers. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java index 21d79ee..fae8193 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java @@ -82,4 +82,8 @@ String getNodeHttpAddress(); String getNodeLabelExpression(); + + boolean hasIncreaseReservation(); + + void cancelInceaseReservation(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java new file mode 100644 index 0000000..920cfdb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; + +public class RMContainerChangeResourceEvent extends RMContainerEvent { + + final Resource targetResource; + final boolean increase; + + public RMContainerChangeResourceEvent(ContainerId containerId, + Resource targetResource, boolean increase) { + super(containerId, RMContainerEventType.CHANGE_RESOURCE); + + this.targetResource = targetResource; + this.increase = increase; + } + + public Resource getTargetResource() { + return targetResource; + } + + public boolean isIncrease() { + return increase; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java index 259d68b3..a3b4b76 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java @@ -25,6 +25,10 @@ ACQUIRED, KILL, // Also from Node on NodeRemoval RESERVED, + + // when a container acquired by AM after + // it increased/decreased + ACQUIRE_UPDATED_CONTAINER, LAUNCHED, FINISHED, @@ -35,5 +39,12 @@ // Source: ContainerAllocationExpirer EXPIRE, - RECOVER + RECOVER, + + // Source: Scheduler + // Resource change approved by scheduler + CHANGE_RESOURCE, + + // NM reported resource change is done + NM_DONE_CHANGE_RESOURCE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index a3d8bee..4b091a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -118,7 +118,18 @@ .addTransition(RMContainerState.RUNNING, RMContainerState.RELEASED, RMContainerEventType.RELEASED, new KillTransition()) .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, - RMContainerEventType.EXPIRE) + RMContainerEventType.RESERVED, new ContainerReservedTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.EXPIRED, + RMContainerEventType.EXPIRE, + new ContainerExpiredWhileRunningTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.CHANGE_RESOURCE, new ChangeResourceTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.ACQUIRE_UPDATED_CONTAINER, + new ContainerAcquiredWhileRunningTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.NM_DONE_CHANGE_RESOURCE, + new NMReportedContainerChangeIsDoneTransition()) // Transitions from COMPLETED state .addTransition(RMContainerState.COMPLETED, RMContainerState.COMPLETED, @@ -140,9 +151,7 @@ RMContainerEventType.KILL, RMContainerEventType.FINISHED)) // create the topology tables - .installTopology(); - - + .installTopology(); private final StateMachine stateMachine; @@ -166,6 +175,10 @@ private ContainerStatus finishedStatus; private boolean isAMContainer; private List resourceRequests; + + private boolean containerIncreased = false; + private boolean containerIncreasedAndAcquired = false; + private volatile boolean hasIncreaseReservation = false; public RMContainerImpl(Container container, ApplicationAttemptId appAttemptId, NodeId nodeId, String user, @@ -471,8 +484,8 @@ public RMContainerState transition(RMContainerImpl container, } } - private static final class ContainerReservedTransition extends - BaseTransition { + private static final class ContainerReservedTransition + extends BaseTransition { @Override public void transition(RMContainerImpl container, RMContainerEvent event) { @@ -480,6 +493,10 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { container.reservedResource = e.getReservedResource(); container.reservedNode = e.getReservedNode(); container.reservedPriority = e.getReservedPriority(); + + if (container.getState() != RMContainerState.NEW) { + container.hasIncreaseReservation = true; + } } } @@ -509,6 +526,80 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { .getApplicationAttemptId().getApplicationId(), container.nodeId)); } } + + private static final class ContainerAcquiredWhileRunningTransition extends + BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + if (container.containerIncreased) { + // If container is increased but not acquired by AM, we will start + // containerAllocationExpirer for this container in this transition. + container.containerAllocationExpirer.register(event.getContainerId()); + container.containerIncreasedAndAcquired = true; + } + } + } + + private static final class NMReportedContainerChangeIsDoneTransition + extends BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + if (container.containerIncreased) { + // If container is increased but not acquired by AM, we will start + // containerAllocationExpirer for this container in this transition. + container.containerAllocationExpirer.unregister(event.getContainerId()); + container.containerIncreasedAndAcquired = false; + } + } + } + + private static final class ContainerExpiredWhileRunningTransition extends + BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + if (container.containerIncreasedAndAcquired) { + // When the container expired, and it has a pending increased request, we + // will kill the container. + new KillTransition().transition(container, event); + // TODO, we can do better for this: roll back container resource to the + // resource before increase, and notify scheduler about this decrease as + // well. Will do that in a separated JIRA. + container.containerIncreasedAndAcquired = false; + } + } + } + + private static final class ChangeResourceTransition extends BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + RMContainerChangeResourceEvent changeEvent = (RMContainerChangeResourceEvent)event; + + // Register with containerAllocationExpirer. + // For now, we assume timeout for increase is as same as container + // allocation. + if (changeEvent.isIncrease()) { + container.containerIncreased = true; + } else { + // if this is a decrease request, if container was increased but not + // told to NM, we can consider previous increase is cancelled, + // unregister from the containerAllocationExpirer + container.containerAllocationExpirer.unregister(container + .getContainerId()); + container.containerIncreased = false; + container.containerIncreasedAndAcquired = false; + } + + container.container.setResource(changeEvent.getTargetResource()); + + // We reach here means we either allocated increase reservation OR + // decreased container, reservation will be cancelled anyway. + container.hasIncreaseReservation = false; + } + } private static final class ContainerRescheduledTransition extends FinishedTransition { @@ -665,4 +756,14 @@ public int compareTo(RMContainer o) { } return -1; } + + @Override + public boolean hasIncreaseReservation() { + return hasIncreaseReservation; + } + + @Override + public void cancelInceaseReservation() { + hasIncreaseReservation = false; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index 00cd3b6..eba5075 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -24,6 +24,7 @@ import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -149,4 +150,12 @@ * @return labels in this node */ public Set getNodeLabels(); + + /** + * Update containers to be decreased + */ + public void updateNodeHeartbeatResposneForContainersDecreasing( + NodeHeartbeatResponse response); + + public List pullNewlyIncreasedContainers(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java new file mode 100644 index 0000000..62925ad --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java @@ -0,0 +1,39 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.rmnode; + +import java.util.List; + +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NodeId; + +public class RMNodeDecreaseContainerEvent extends RMNodeEvent { + final List toBeDecreasedContainers; + + public RMNodeDecreaseContainerEvent(NodeId nodeId, + List toBeDecreasedContainers) { + super(nodeId, RMNodeEventType.DECREASE_CONTAINER); + + this.toBeDecreasedContainers = toBeDecreasedContainers; + } + + public List getToBeDecreasedContainers() { + return toBeDecreasedContainers; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java index 27ba1c0..a68c894 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java @@ -42,6 +42,7 @@ // Source: Container CONTAINER_ALLOCATED, CLEANUP_CONTAINER, + DECREASE_CONTAINER, // Source: RMAppAttempt FINISHED_CONTAINERS_PULLED_BY_AM, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 7a1ba74..e6a6e2f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -20,8 +20,11 @@ import java.util.ArrayList; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentLinkedQueue; @@ -36,6 +39,7 @@ import org.apache.hadoop.net.Node; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; @@ -131,6 +135,12 @@ /* the list of applications that are running on this node */ private final List runningApplications = new ArrayList(); + + private final Map toBeDecreasedContainers = + new HashMap<>(); + + private final Map nmReportedIncreasedContainers = + new HashMap<>(); private NodeHeartbeatResponse latestNodeHeartBeatResponse = recordFactory .newRecordInstance(NodeHeartbeatResponse.class); @@ -178,6 +188,9 @@ .addTransition(NodeState.RUNNING, NodeState.SHUTDOWN, RMNodeEventType.SHUTDOWN, new DeactivateNodeTransition(NodeState.SHUTDOWN)) + .addTransition(NodeState.RUNNING, NodeState.RUNNING, + RMNodeEventType.DECREASE_CONTAINER, + new DecreaseContainersTransition()) //Transitions from REBOOTED state .addTransition(NodeState.REBOOTED, NodeState.REBOOTED, @@ -430,6 +443,19 @@ public void updateNodeHeartbeatResponseForCleanup(NodeHeartbeatResponse response this.writeLock.unlock(); } }; + + @Override + public void updateNodeHeartbeatResposneForContainersDecreasing( + NodeHeartbeatResponse response) { + this.writeLock.lock(); + + try { + response.addAllContainersToDecrease(toBeDecreasedContainers.values()); + toBeDecreasedContainers.clear(); + } finally { + this.writeLock.unlock(); + } + } @Override public NodeHeartbeatResponse getLastNodeHeartBeatResponse() { @@ -759,6 +785,19 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeFinishedContainersPulledByAMEvent) event).getContainers()); } } + + public static class DecreaseContainersTransition + implements SingleArcTransition { + + @Override + public void transition(RMNodeImpl rmNode, RMNodeEvent event) { + RMNodeDecreaseContainerEvent de = (RMNodeDecreaseContainerEvent) event; + + for (Container c : de.getToBeDecreasedContainers()) { + rmNode.toBeDecreasedContainers.put(c.getId(), c); + } + } + } public static class DeactivateNodeTransition implements SingleArcTransition { @@ -827,6 +866,8 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { } rmNode.handleContainerStatus(statusEvent.getContainers()); + rmNode.handleReportedIncreasedContainers( + statusEvent.getNMReportedIncreasedContainers()); List logAggregationReportsForApps = statusEvent.getLogAggregationReportsForApps(); @@ -919,6 +960,34 @@ public int getQueueSize() { } return nlm.getLabelsOnNode(nodeId); } + + private void handleReportedIncreasedContainers( + List reportedIncreasedContainers) { + for (Container container : reportedIncreasedContainers) { + ContainerId containerId = container.getId(); + + // Don't bother with containers already scheduled for cleanup, or for + // applications already killed. The scheduler doens't need to know any + // more about this container + if (containersToClean.contains(containerId)) { + LOG.info("Container " + containerId + " already scheduled for " + + "cleanup, no further processing"); + continue; + } + + ApplicationId containerAppId = + containerId.getApplicationAttemptId().getApplicationId(); + + if (finishedApplications.contains(containerAppId)) { + LOG.info("Container " + containerId + + " belongs to an application that is already killed," + + " no further processing"); + continue; + } + + this.nmReportedIncreasedContainers.put(containerId, container); + } + } private void handleContainerStatus(List containerStatuses) { // Filter the map to only obtain just launched containers and finished @@ -989,4 +1058,10 @@ private void handleLogAggregationStatus( } } + @Override + public List pullNewlyIncreasedContainers() { + // TODO Auto-generated method stub + return null; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java index b95d7d3..8323f3c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java @@ -18,8 +18,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmnode; +import java.util.Collections; import java.util.List; + import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; @@ -33,28 +36,36 @@ private final NodeHeartbeatResponse latestResponse; private final List keepAliveAppIds; private List logAggregationReportsForApps; - + private final List nmReportedIncreasedContainers; + + // Used by tests public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, List collection, List keepAliveAppIds, NodeHeartbeatResponse latestResponse) { - super(nodeId, RMNodeEventType.STATUS_UPDATE); - this.nodeHealthStatus = nodeHealthStatus; - this.containersCollection = collection; - this.keepAliveAppIds = keepAliveAppIds; - this.latestResponse = latestResponse; - this.logAggregationReportsForApps = null; + this(nodeId, nodeHealthStatus, collection, keepAliveAppIds, + latestResponse, null); } public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, List collection, List keepAliveAppIds, NodeHeartbeatResponse latestResponse, - List logAggregationReportsForApps) { + List nmReportedIncreasedContainers) { + this(nodeId, nodeHealthStatus, collection, keepAliveAppIds, latestResponse, + null, nmReportedIncreasedContainers); + } + + public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, + List collection, List keepAliveAppIds, + NodeHeartbeatResponse latestResponse, + List logAggregationReportsForApps, + List nmReportedIncreasedContainers) { super(nodeId, RMNodeEventType.STATUS_UPDATE); this.nodeHealthStatus = nodeHealthStatus; this.containersCollection = collection; this.keepAliveAppIds = keepAliveAppIds; this.latestResponse = latestResponse; this.logAggregationReportsForApps = logAggregationReportsForApps; + this.nmReportedIncreasedContainers = nmReportedIncreasedContainers; } public NodeHealthStatus getNodeHealthStatus() { @@ -81,4 +92,9 @@ public void setLogAggregationReportsForApps( List logAggregationReportsForApps) { this.logAggregationReportsForApps = logAggregationReportsForApps; } + + public List getNMReportedIncreasedContainers() { + return nmReportedIncreasedContainers == null ? Collections.EMPTY_LIST + : nmReportedIncreasedContainers; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index ed05189..7a378db 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -19,7 +19,16 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -37,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -58,13 +68,16 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecreaseContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @@ -87,7 +100,7 @@ protected Resource clusterResource = Resource.newInstance(0, 0); protected Resource minimumAllocation; - private Resource maximumAllocation; + protected Resource maximumAllocation; private Resource configuredMaximumAllocation; private int maxNodeMemory = -1; private int maxNodeVCores = -1; @@ -228,6 +241,53 @@ protected synchronized void containerLaunchedOnNode( application.containerLaunchedOnNode(containerId, node.getNodeID()); } + + protected synchronized void containerIncreasedOnNode(ContainerId containerId, + SchedulerNode node, Container increasedContainerReportedByNM) { + // Get the application for the finished container + SchedulerApplicationAttempt application = + getCurrentAttemptForContainer(containerId); + if (application == null) { + LOG.info("Unknown application " + + containerId.getApplicationAttemptId().getApplicationId() + + " increased container " + containerId + " on node: " + node); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId)); + return; + } + + RMContainer rmContainer = getRMContainer(containerId); + Resource rmContainerResource = rmContainer.getAllocatedResource(); + Resource nmContainerResource = increasedContainerReportedByNM.getResource(); + + + if (Resources.equals(nmContainerResource, rmContainerResource)){ + // NM reported expected container size, tell RMContainer. Which will stop + // container expire monitor + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.NM_DONE_CHANGE_RESOURCE)); + } else if (Resources.fitsIn(nmContainerResource, rmContainerResource)) { + // when rmContainerResource >= nmContainerResource, we won't do anything, + // it is possible a container increased is issued by RM, but AM hasn't + // told NM. + } else if (Resources.fitsIn(rmContainerResource, nmContainerResource)) { + // When rmContainerResource <= nmContainerResource, it could happen when a + // container decreased by RM before it is increased in NM. + + // Tell NM to decrease the container + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeDecreaseContainerEvent(node.getNodeID(), + Arrays.asList(rmContainer.getContainer()))); + } else { + // Something wrong happened, kill the container + LOG.warn("Something wrong happened, container size reported by NM" + + " is not expected, ContainerID=" + containerId + + " rm-size-resource:" + rmContainerResource + " nm-size-reosurce:" + + nmContainerResource); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId)); + } + } public T getApplicationAttempt(ApplicationAttemptId applicationAttemptId) { SchedulerApplication app = @@ -508,6 +568,39 @@ protected void releaseContainers(List containers, SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED); } } + + protected void decreaseContainers( + List decreaseRequests, + SchedulerApplicationAttempt attempt) { + List normalizedRequests = + checkAndNormalizeContainerChangeRequests(attempt, decreaseRequests, + false); + for (SchedulerContainerResourceChangeRequest request : normalizedRequests) { + if (LOG.isDebugEnabled()) { + LOG.debug("Processing decrease request:" + request); + } + + boolean hasIncreaseRequest = + attempt.removeIncreaseRequest(request.getNodeId(), + request.getPriority(), request.getContainerId()); + + if (hasIncreaseRequest) { + if (LOG.isDebugEnabled()) { + LOG.debug("While processing decrease request, found a increase request " + + "for the same container " + + request.getContainerId() + + ", removed the increase request"); + } + } + + // handle decrease request + decreaseContainer(request, attempt); + } + } + + protected abstract void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt); public SchedulerNode getSchedulerNode(NodeId nodeId) { return nodes.get(nodeId); @@ -708,4 +801,107 @@ public void updateApplicationPriority(Priority newPriority, // Dummy Implementation till Application Priority changes are done in // specific scheduler. } + + /** + * Normalize container increase/decrease request, and return + * SchedulerContainerResourceChangeRequest according to given + * ContainerResourceChangeRequest. + * + *
+   * - Returns non-null value means validation succeeded
+   * - Throw exception when any other error happens
+   * 
+ */ + private SchedulerContainerResourceChangeRequest + checkAndNormalizeContainerChangeRequest( + SchedulerApplicationAttempt attempt, + ContainerResourceChangeRequest request, boolean increase) + throws YarnException { + ContainerId containerId = request.getContainerId(); + RMContainer rmContainer = getRMContainer(containerId); + if (null == rmContainer) { + String msg = + "Failed to get rmContainer for " + + (increase ? "increase" : "decrease") + + " request, with container-id=" + containerId; + throw new YarnException(msg); + } + + if (rmContainer.getState() != RMContainerState.RUNNING) { + String msg = + "rmContainer's state is not RUNNING, for " + + (increase ? "increase" : "decrease") + + " request, with container-id=" + containerId; + throw new YarnException(msg); + } + + Resource targetResource = + Resources.normalize(getResourceCalculator(), request.getCapability(), + minimumAllocation, maximumAllocation, minimumAllocation); + + // Compare targetResource and original resource + Resource originalResource = rmContainer.getAllocatedResource(); + + // Resource comparasion should be >= (or <=) for all resource vectors, for + // example, you cannot request target resource of a <10G, 10> container to + // <20G, 8> + if (increase) { + if (originalResource.getMemory() > targetResource.getMemory() + || originalResource.getVirtualCores() > targetResource + .getVirtualCores()) { + String msg = + "Trying to increase a container, but target resource has some" + + " resource < original resource, target=" + targetResource + + " original=" + originalResource + " containerId=" + + containerId; + throw new YarnException(msg); + } + } else { + if (originalResource.getMemory() < targetResource.getMemory() + || originalResource.getVirtualCores() < targetResource + .getVirtualCores()) { + String msg = + "Trying to decrease a container, but target resource has " + + "some resource > original resource, target=" + targetResource + + " original=" + originalResource + " containerId=" + + containerId; + throw new YarnException(msg); + } + } + + // Update normalized target resource + request.setCapability(targetResource); + return new SchedulerContainerResourceChangeRequest( + getSchedulerNode(rmContainer.getAllocatedNode()), rmContainer, + targetResource); + } + + protected List + checkAndNormalizeContainerChangeRequests( + SchedulerApplicationAttempt attempt, + List changeRequests, + boolean increase) { + if (null == changeRequests || changeRequests.isEmpty()) { + return Collections.EMPTY_LIST; + } + + List schedulerChangeRequests = + new ArrayList(); + for (ContainerResourceChangeRequest r : changeRequests) { + SchedulerContainerResourceChangeRequest sr = null; + try { + sr = checkAndNormalizeContainerChangeRequest(attempt, r, increase); + } catch (YarnException e) { + LOG.warn("Error happens when checking increase request:" + e + + ". Ignoring this request"); + continue; + } + if (null == sr) { + continue; + } + schedulerChangeRequests.add(sr); + } + + return schedulerChangeRequests; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java index 3f2d8af..af6caad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java @@ -34,6 +34,9 @@ final Set fungibleContainers; final List fungibleResources; final List nmTokens; + final List increasedContainers; + final List decreasedContainers; + public Allocation(List containers, Resource resourceLimit, Set strictContainers, Set fungibleContainers, @@ -45,12 +48,22 @@ public Allocation(List containers, Resource resourceLimit, public Allocation(List containers, Resource resourceLimit, Set strictContainers, Set fungibleContainers, List fungibleResources, List nmTokens) { + this(containers, resourceLimit,strictContainers, fungibleContainers, + fungibleResources, nmTokens, null, null); + } + + public Allocation(List containers, Resource resourceLimit, + Set strictContainers, Set fungibleContainers, + List fungibleResources, List nmTokens, + List increasedContainers, List decreasedContainer) { this.containers = containers; this.resourceLimit = resourceLimit; this.strictContainers = strictContainers; this.fungibleContainers = fungibleContainers; this.fungibleResources = fungibleResources; this.nmTokens = nmTokens; + this.increasedContainers = increasedContainers; + this.decreasedContainers = decreasedContainer; } public List getContainers() { @@ -76,5 +89,12 @@ public Resource getResourceLimit() { public List getNMTokens() { return nmTokens; } - + + public List getIncreasedContainers() { + return increasedContainers; + } + + public List getDecreasedContainers() { + return decreasedContainers; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 77ac5b3..d88442a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -20,10 +20,12 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; @@ -35,6 +37,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -63,8 +67,11 @@ final Set priorities = new TreeSet( new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator()); - final Map> requests = - new ConcurrentHashMap>(); + final Map> resourceRequestMap = + new ConcurrentHashMap>(); + final Map>> increaseRequestMap = + new ConcurrentHashMap<>(); private Set blacklist = new HashSet(); //private final ApplicationStore store; @@ -113,13 +120,177 @@ public synchronized boolean isPending() { */ private synchronized void clearRequests() { priorities.clear(); - requests.clear(); + resourceRequestMap.clear(); LOG.info("Application " + applicationId + " requests cleared"); } public long getNewContainerId() { return this.containerIdCounter.incrementAndGet(); } + + public boolean hasIncreaseRequest(NodeId nodeId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return false; + } + return requestsOnNode.size() > 0; + } + + public Map + getIncreaseRequests(NodeId nodeId, Priority priority) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return null; + } + + return requestsOnNode.get(priority); + } + + public synchronized boolean updateIncreaseRequests( + List increaseRequests) { + boolean resourceUpdated = false; + + for (SchedulerContainerResourceChangeRequest r : increaseRequests) { + NodeId nodeId = r.getRMContainer().getAllocatedNode(); + + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + requestsOnNode = new TreeMap<>(); + increaseRequestMap.put(nodeId, requestsOnNode); + } + + SchedulerContainerResourceChangeRequest originalResource = + getIncreaseRequest(nodeId, r.getPriority(), r.getContainerId()); + if (null != originalResource) { + if (Resources.equals(originalResource.getTargetCapacity(), + r.getTargetCapacity())) { + // New target capacity is as same as what we have, just ignore the new + // one + continue; + } + + // remove the old one + removeIncreaseRequest(nodeId, originalResource.getPriority(), + originalResource.getContainerId()); + } + + if (Resources.equals(r.getTargetCapacity(), r.getRMContainer().getAllocatedResource())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to increase/decrease container, " + + "target capacity = original capacity = " + originalResource + + " for container=" + r.getContainerId() + + ". Will ignore this increase request"); + } + continue; + } + + // add the new one + resourceUpdated = true; + insertIncreaseRequest(r); + } + return resourceUpdated; + } + + // insert increase request and add missing hierarchy if missing + private void insertIncreaseRequest(SchedulerContainerResourceChangeRequest request) { + NodeId nodeId = request.getNodeId(); + Priority priority = request.getPriority(); + ContainerId containerId = request.getContainerId(); + + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + requestsOnNode = + new HashMap>(); + increaseRequestMap.put(nodeId, requestsOnNode); + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + requestsOnNodeWithPriority = + new TreeMap(); + requestsOnNode.put(priority, requestsOnNodeWithPriority); + } + + requestsOnNodeWithPriority.put(containerId, request); + + // update resources + String partition = request.getRMContainer().getNodeLabelExpression(); + Resource delta = request.getDeltaCapacity(); + appResourceUsage.incPending(partition, delta); + queue.incPendingResource(partition, delta); + + if (LOG.isDebugEnabled()) { + LOG.debug("Added increase request:" + request.getContainerId() + + " delta=" + request.getDeltaCapacity()); + } + + // update priorities + priorities.add(priority); + } + + public synchronized boolean removeIncreaseRequest(NodeId nodeId, Priority priority, + ContainerId containerId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return false; + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + return false; + } + + SchedulerContainerResourceChangeRequest request = + requestsOnNodeWithPriority.remove(containerId); + + // remove hierarchies if it becomes empty + if (requestsOnNodeWithPriority.isEmpty()) { + requestsOnNode.remove(priority); + } + if (requestsOnNode.isEmpty()) { + increaseRequestMap.remove(nodeId); + } + + if (request == null) { + return false; + } + + // update queue's pending resource if request exists + String partition = request.getRMContainer().getNodeLabelExpression(); + Resource delta = request.getDeltaCapacity(); + appResourceUsage.decPending(partition, delta); + queue.decPendingResource(partition, delta); + + if (LOG.isDebugEnabled()) { + LOG.debug("remove increase request:" + request); + } + + return true; + } + + public SchedulerContainerResourceChangeRequest getIncreaseRequest(NodeId nodeId, + Priority priority, ContainerId containerId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return null; + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + return null; + } + + return requestsOnNodeWithPriority.get(containerId); + } /** * The ApplicationMaster is updating resource requirements for the @@ -162,11 +333,11 @@ synchronized public boolean updateResourceRequests( } } - Map asks = this.requests.get(priority); + Map asks = this.resourceRequestMap.get(priority); if (asks == null) { asks = new ConcurrentHashMap(); - this.requests.put(priority, asks); + this.resourceRequestMap.put(priority, asks); this.priorities.add(priority); } lastRequest = asks.get(resourceName); @@ -241,12 +412,12 @@ synchronized public void updateBlacklist( synchronized public Map getResourceRequests( Priority priority) { - return requests.get(priority); + return resourceRequestMap.get(priority); } public List getAllResourceRequests() { List ret = new ArrayList(); - for (Map r : requests.values()) { + for (Map r : resourceRequestMap.values()) { ret.addAll(r.values()); } return ret; @@ -254,7 +425,7 @@ synchronized public void updateBlacklist( synchronized public ResourceRequest getResourceRequest(Priority priority, String resourceName) { - Map nodeRequests = requests.get(priority); + Map nodeRequests = resourceRequestMap.get(priority); return (nodeRequests == null) ? null : nodeRequests.get(resourceName); } @@ -267,6 +438,50 @@ public synchronized boolean isBlacklisted(String resourceName) { return blacklist.contains(resourceName); } + public synchronized void increaseContainer( + SchedulerContainerResourceChangeRequest increaseRequest) { + NodeId nodeId = increaseRequest.getNodeId(); + Priority priority = increaseRequest.getPriority(); + ContainerId containerId = increaseRequest.getContainerId(); + + if (LOG.isDebugEnabled()) { + LOG.debug("allocated increase request : applicationId=" + applicationId + + " container=" + containerId + " host=" + + increaseRequest.getNodeId() + " user=" + user + " resource=" + + increaseRequest.getDeltaCapacity()); + } + + // Set queue metrics + queue.getMetrics().allocateResources(user, 0, + increaseRequest.getDeltaCapacity(), true); + + // remove the increase request from pending increase request map + removeIncreaseRequest(nodeId, priority, containerId); + + // update usage + appResourceUsage.incUsed(increaseRequest.getNodePartition(), + increaseRequest.getDeltaCapacity()); + } + + public synchronized void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest) { + // Delta is negative when it's a decrease request + Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Decrease container : applicationId=" + applicationId + + " container=" + decreaseRequest.getContainerId() + " host=" + + decreaseRequest.getNodeId() + " user=" + user + " resource=" + + absDelta); + } + + // Set queue metrics + queue.getMetrics().releaseResources(user, 0, absDelta); + + // update usage + appResourceUsage.decUsed(decreaseRequest.getNodePartition(), absDelta); + } + /** * Resources have been allocated to this application by the resource * scheduler. Track them. @@ -325,11 +540,11 @@ synchronized private void allocateNodeLocal(SchedulerNode node, // Update future requirements decResourceRequest(node.getNodeName(), priority, nodeLocalRequest); - ResourceRequest rackLocalRequest = requests.get(priority).get( + ResourceRequest rackLocalRequest = resourceRequestMap.get(priority).get( node.getRackName()); decResourceRequest(node.getRackName(), priority, rackLocalRequest); - ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest offRackRequest = resourceRequestMap.get(priority).get( ResourceRequest.ANY); decrementOutstanding(offRackRequest); @@ -343,7 +558,7 @@ private void decResourceRequest(String resourceName, Priority priority, ResourceRequest request) { request.setNumContainers(request.getNumContainers() - 1); if (request.getNumContainers() == 0) { - requests.get(priority).remove(resourceName); + resourceRequestMap.get(priority).remove(resourceName); } } @@ -360,7 +575,7 @@ synchronized private void allocateRackLocal(SchedulerNode node, // Update future requirements decResourceRequest(node.getRackName(), priority, rackLocalRequest); - ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest offRackRequest = resourceRequestMap.get(priority).get( ResourceRequest.ANY); decrementOutstanding(offRackRequest); @@ -415,6 +630,12 @@ synchronized private void checkForDeactivation() { } } } + + // also we need to check increase request + if (!deactivate) { + deactivate = increaseRequestMap.isEmpty(); + } + if (deactivate) { activeUsersManager.deactivateApplication(user, applicationId); } @@ -423,7 +644,7 @@ synchronized private void checkForDeactivation() { synchronized public void move(Queue newQueue) { QueueMetrics oldMetrics = queue.getMetrics(); QueueMetrics newMetrics = newQueue.getMetrics(); - for (Map asks : requests.values()) { + for (Map asks : resourceRequestMap.values()) { ResourceRequest request = asks.get(ResourceRequest.ANY); if (request != null) { oldMetrics.decrPendingResources(user, request.getNumContainers(), @@ -450,7 +671,7 @@ synchronized public void move(Queue newQueue) { synchronized public void stop(RMAppAttemptState rmAppAttemptFinalState) { // clear pending resources metrics for the application QueueMetrics metrics = queue.getMetrics(); - for (Map asks : requests.values()) { + for (Map asks : resourceRequestMap.values()) { ResourceRequest request = asks.get(ResourceRequest.ANY); if (request != null) { metrics.decrPendingResources(user, request.getNumContainers(), @@ -513,4 +734,10 @@ public ResourceRequest cloneResourceRequest(ResourceRequest request) { request.getRelaxLocality(), request.getNodeLabelExpression()); return newRequest; } + + public synchronized void notifyContainerStopped(RMContainer rmContainer) { + // remove from pending increase request map if it exists + removeIncreaseRequest(rmContainer.getAllocatedNode(), + rmContainer.getAllocatedPriority(), rmContainer.getContainerId()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 09fd73e..d94b621 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -373,17 +373,20 @@ public void decrPendingResources(String user, int containers, Resource res) { } private void _decrPendingResources(int containers, Resource res) { + // if #container = 0, means change container resource pendingContainers.decr(containers); - pendingMB.decr(res.getMemory() * containers); - pendingVCores.decr(res.getVirtualCores() * containers); + pendingMB.decr(res.getMemory() * Math.max(containers, 1)); + pendingVCores.decr(res.getVirtualCores() * Math.max(containers, 1)); } public void allocateResources(String user, int containers, Resource res, boolean decrPending) { + // if #containers = 0, means change container resource allocatedContainers.incr(containers); aggregateContainersAllocated.incr(containers); - allocatedMB.incr(res.getMemory() * containers); - allocatedVCores.incr(res.getVirtualCores() * containers); + + allocatedMB.incr(res.getMemory() * Math.max(containers, 1)); + allocatedVCores.incr(res.getVirtualCores() * Math.max(containers, 1)); if (decrPending) { _decrPendingResources(containers, res); } @@ -397,10 +400,11 @@ public void allocateResources(String user, int containers, Resource res, } public void releaseResources(String user, int containers, Resource res) { + // if #container = 0, means change container resource. allocatedContainers.decr(containers); aggregateContainersReleased.incr(containers); - allocatedMB.decr(res.getMemory() * containers); - allocatedVCores.decr(res.getVirtualCores() * containers); + allocatedMB.decr(res.getMemory() * Math.max(containers, 1)); + allocatedVCores.decr(res.getVirtualCores() * Math.max(containers, 1)); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { userMetrics.releaseResources(user, containers, res); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java index 519de98..96288f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java @@ -28,7 +28,7 @@ private Queue queue; private final String user; - private T currentAttempt; + private volatile T currentAttempt; private volatile Priority priority; public SchedulerApplication(Queue queue, String user) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 4872543..24ea326 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -19,11 +19,13 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; @@ -51,6 +53,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerChangeResourceEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; @@ -58,9 +61,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity; +import org.apache.hadoop.yarn.state.InvalidStateTransitionException; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -104,8 +108,10 @@ private AtomicLong firstAllocationRequestSentTime = new AtomicLong(0); private AtomicLong firstContainerAllocatedTime = new AtomicLong(0); - protected List newlyAllocatedContainers = - new ArrayList(); + protected List newlyAllocatedContainers = new ArrayList<>(); + protected Map newlyDecreasedContainers = new HashMap<>(); + protected Map newlyIncreasedContainers = new HashMap<>(); + protected Set updatedNMTokens = new HashSet<>(); // This pendingRelease is used in work-preserving recovery scenario to keep // track of the AM's outstanding release requests. RM on recovery could @@ -219,7 +225,8 @@ public long getNewContainerId() { return appSchedulingInfo.getPriorities(); } - public synchronized ResourceRequest getResourceRequest(Priority priority, String resourceName) { + public synchronized ResourceRequest getResourceRequest(Priority priority, + String resourceName) { return this.appSchedulingInfo.getResourceRequest(priority, resourceName); } @@ -324,24 +331,28 @@ public synchronized boolean isStopped() { return reservedContainers; } - public synchronized RMContainer reserve(SchedulerNode node, Priority priority, - RMContainer rmContainer, Container container) { - // Create RMContainer if necessary - if (rmContainer == null) { - rmContainer = - new RMContainerImpl(container, getApplicationAttemptId(), - node.getNodeID(), appSchedulingInfo.getUser(), rmContext); + public synchronized boolean reserveIncreasedContainer(SchedulerNode node, + Priority priority, RMContainer rmContainer, Resource reservedResource) { + if (commonReserve(node, priority, rmContainer, reservedResource)) { attemptResourceUsage.incReserved(node.getPartition(), - container.getResource()); - - // Reset the re-reservation count - resetReReservations(priority); - } else { - // Note down the re-reservation - addReReservation(priority); + reservedResource); + // succeeded + return true; + } + + return false; + } + + private synchronized boolean commonReserve(SchedulerNode node, + Priority priority, RMContainer rmContainer, Resource reservedResource) { + try { + rmContainer.handle(new RMContainerReservedEvent(rmContainer + .getContainerId(), reservedResource, node.getNodeID(), priority)); + } catch (InvalidStateTransitionException e) { + // We reach here could be caused by container already finished, return + // false indicate it fails + return false; } - rmContainer.handle(new RMContainerReservedEvent(container.getId(), - container.getResource(), node.getNodeID(), priority)); Map reservedContainers = this.reservedContainers.get(priority); @@ -356,8 +367,30 @@ public synchronized RMContainer reserve(SchedulerNode node, Priority priority, + " reserved container " + rmContainer + " on node " + node + ". This attempt currently has " + reservedContainers.size() + " reserved containers at priority " + priority - + "; currentReservation " + container.getResource()); + + "; currentReservation " + reservedResource); } + + return true; + } + + public synchronized RMContainer reserve(SchedulerNode node, + Priority priority, RMContainer rmContainer, Container container) { + // Create RMContainer if necessary + if (rmContainer == null) { + rmContainer = + new RMContainerImpl(container, getApplicationAttemptId(), + node.getNodeID(), appSchedulingInfo.getUser(), rmContext); + attemptResourceUsage.incReserved(node.getPartition(), + container.getResource()); + + // Reset the re-reservation count + resetReReservations(priority); + } else { + // Note down the re-reservation + addReReservation(priority); + } + + commonReserve(node, priority, rmContainer, container.getResource()); return rmContainer; } @@ -456,57 +489,100 @@ public ContainersAndNMTokensAllocation(List containerList, return nmTokenList; } } + + private Container updateContainerAndNMToken(RMContainer rmContainer, + boolean newContainer) { + Container container = rmContainer.getContainer(); + ContainerType containerType = ContainerType.TASK; + // The working knowledge is that masterContainer for AM is null as it + // itself is the master container. + RMAppAttempt appAttempt = + rmContext + .getRMApps() + .get( + container.getId().getApplicationAttemptId() + .getApplicationId()).getCurrentAppAttempt(); + if (appAttempt.getMasterContainer() == null + && appAttempt.getSubmissionContext().getUnmanagedAM() == false) { + containerType = ContainerType.APPLICATION_MASTER; + } + try { + // create container token and NMToken altogether. + container.setContainerToken(rmContext.getContainerTokenSecretManager() + .createContainerToken(container.getId(), container.getNodeId(), + getUser(), container.getResource(), container.getPriority(), + rmContainer.getCreationTime(), this.logAggregationContext, + rmContainer.getNodeLabelExpression(), containerType)); + NMToken nmToken = + rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), + getApplicationAttemptId(), container); + if (nmToken != null) { + updatedNMTokens.add(nmToken); + } + } catch (IllegalArgumentException e) { + // DNS might be down, skip returning this container. + LOG.error("Error trying to assign container token and NM token to" + + " an updated container " + container.getId(), e); + return null; + } + + if (newContainer) { + rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), + RMContainerEventType.ACQUIRED)); + } else { + rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), + RMContainerEventType.ACQUIRE_UPDATED_CONTAINER)); + } + return container; + } - // Create container token and NMToken altogether, if either of them fails for + // Create container token and update NMToken altogether, if either of them fails for // some reason like DNS unavailable, do not return this container and keep it // in the newlyAllocatedContainers waiting to be refetched. - public synchronized ContainersAndNMTokensAllocation - pullNewlyAllocatedContainersAndNMTokens() { + public synchronized List pullNewlyAllocatedContainers() { List returnContainerList = new ArrayList(newlyAllocatedContainers.size()); - List nmTokens = new ArrayList(); for (Iterator i = newlyAllocatedContainers.iterator(); i - .hasNext();) { + .hasNext();) { RMContainer rmContainer = i.next(); - Container container = rmContainer.getContainer(); - ContainerType containerType = ContainerType.TASK; - // The working knowledge is that masterContainer for AM is null as it - // itself is the master container. - RMAppAttempt appAttempt = - rmContext - .getRMApps() - .get( - container.getId().getApplicationAttemptId() - .getApplicationId()).getCurrentAppAttempt(); - if (appAttempt.getMasterContainer() == null - && appAttempt.getSubmissionContext().getUnmanagedAM() == false) { - containerType = ContainerType.APPLICATION_MASTER; - } - try { - // create container token and NMToken altogether. - container.setContainerToken(rmContext.getContainerTokenSecretManager() - .createContainerToken(container.getId(), container.getNodeId(), - getUser(), container.getResource(), container.getPriority(), - rmContainer.getCreationTime(), this.logAggregationContext, - rmContainer.getNodeLabelExpression(), containerType)); - NMToken nmToken = - rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), - getApplicationAttemptId(), container); - if (nmToken != null) { - nmTokens.add(nmToken); - } - } catch (IllegalArgumentException e) { - // DNS might be down, skip returning this container. - LOG.error("Error trying to assign container token and NM token to" + - " an allocated container " + container.getId(), e); - continue; + Container updatedContainer = updateContainerAndNMToken(rmContainer, true); + // Only add container to return list when it's not null. updatedContainer + // could be null when generate token failed, it can be caused by DNS + // resolving failed. + if (updatedContainer != null) { + returnContainerList.add(updatedContainer); } - returnContainerList.add(container); i.remove(); - rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), - RMContainerEventType.ACQUIRED)); } - return new ContainersAndNMTokensAllocation(returnContainerList, nmTokens); + return returnContainerList; + } + + private synchronized List pullNewlyUpdatedContainers( + Map updatedContainerMap) { + List returnContainerList = + new ArrayList(updatedContainerMap.size()); + for (Iterator> i = + updatedContainerMap.entrySet().iterator(); i.hasNext();) { + RMContainer rmContainer = i.next().getValue(); + Container updatedContainer = updateContainerAndNMToken(rmContainer, false); + returnContainerList.add(updatedContainer); + i.remove(); + } + return returnContainerList; + } + + public synchronized List pullNewlyIncreasedContainers() { + return pullNewlyUpdatedContainers(newlyIncreasedContainers); + } + + public synchronized List pullNewlyDecreasedContainers() { + return pullNewlyUpdatedContainers(newlyDecreasedContainers); + } + + public synchronized List pullUpdatedNMTokens() { + List returnList = new ArrayList(updatedNMTokens); + updatedNMTokens.clear(); + return returnList; } public synchronized void updateBlacklist( @@ -757,4 +833,65 @@ public synchronized ResourceUsage getSchedulingResourceUsage() { return attemptResourceUsage; } -} + public synchronized boolean removeIncreaseRequest(NodeId nodeId, + Priority priority, ContainerId containerId) { + return appSchedulingInfo.removeIncreaseRequest(nodeId, priority, + containerId); + } + + public synchronized boolean updateIncreaseRequests( + List increaseRequests) { + return appSchedulingInfo.updateIncreaseRequests(increaseRequests); + } + + public synchronized Map + getIncreaseRequests(NodeId nodeId, Priority priority) { + Map map = + appSchedulingInfo.getIncreaseRequests(nodeId, priority); + if (null == map) { + return Collections.emptyMap(); + } + return map; + } + + public synchronized boolean hasIncreaseRequestForContainer(NodeId nodeId, + Priority priority, ContainerId containerId) { + return getIncreaseRequests(nodeId, priority).containsKey(containerId); + } + + private synchronized void changeContainerResource( + SchedulerContainerResourceChangeRequest changeRequest, boolean increase) { + if (increase) { + appSchedulingInfo.increaseContainer(changeRequest); + } else { + appSchedulingInfo.decreaseContainer(changeRequest); + } + + RMContainer changedRMContainer = changeRequest.getRMContainer(); + changedRMContainer.handle( + new RMContainerChangeResourceEvent(changeRequest.getContainerId(), + changeRequest.getTargetCapacity(), increase)); + + // remove pending and not pulled by AM newly-increased/decreased-containers + // and add the new one + if (increase) { + newlyDecreasedContainers.remove(changeRequest.getContainerId()); + newlyIncreasedContainers.put(changeRequest.getContainerId(), + changedRMContainer); + } else { + newlyIncreasedContainers.remove(changeRequest.getContainerId()); + newlyDecreasedContainers.put(changeRequest.getContainerId(), + changedRMContainer); + } + } + + public synchronized void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest) { + changeContainerResource(decreaseRequest, false); + } + + public synchronized void increaseContainer( + SchedulerContainerResourceChangeRequest increaseRequest) { + changeContainerResource(increaseRequest, true); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerContainerResourceChangeRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerContainerResourceChangeRequest.java new file mode 100644 index 0000000..c65ad46 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerContainerResourceChangeRequest.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * This is ContainerResourceChangeRequest in scheduler side, it contains some + * pointers to runtime objects like RMContainer, SchedulerNode, etc. This will + * be easier for scheduler making decision. + */ +public class SchedulerContainerResourceChangeRequest implements + Comparable { + RMContainer rmContainer; + Resource targetCapacity; + SchedulerNode schedulerNode; + Resource deltaCapacity; + + public SchedulerContainerResourceChangeRequest(SchedulerNode schedulerNode, + RMContainer rmContainer, Resource targetCapacity) { + this.rmContainer = rmContainer; + this.targetCapacity = targetCapacity; + this.schedulerNode = schedulerNode; + deltaCapacity = Resources.subtract(targetCapacity, + rmContainer.getAllocatedResource()); + } + + public NodeId getNodeId() { + return this.rmContainer.getAllocatedNode(); + } + + public RMContainer getRMContainer() { + return this.rmContainer; + } + + public Resource getTargetCapacity() { + return this.targetCapacity; + } + + /** + * Delta capacity = before - target, so if it is a decrease request, delta + * capacity will be negative + */ + public Resource getDeltaCapacity() { + return deltaCapacity; + } + + public Priority getPriority() { + return rmContainer.getContainer().getPriority(); + } + + public ContainerId getContainerId() { + return rmContainer.getContainerId(); + } + + public String getNodePartition() { + return schedulerNode.getPartition(); + } + + @Override + public int hashCode() { + return (getContainerId().hashCode() << 16) + targetCapacity.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof SchedulerContainerResourceChangeRequest)) { + return false; + } + return compareTo((SchedulerContainerResourceChangeRequest)other) == 0; + } + + @Override + public int compareTo(SchedulerContainerResourceChangeRequest other) { + if (other == null) { + return -1; + } + + int rc = getPriority().compareTo(other.getPriority()); + if (0 != rc) { + return rc; + } + + return getContainerId().compareTo(other.getContainerId()); + } + + @Override + public String toString() { + return ""; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index f03663a..f3d3906 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -157,6 +157,37 @@ public synchronized void allocateContainer(RMContainer rmContainer) { + getUsedResource() + " used and " + getAvailableResource() + " available after allocation"); } + + private synchronized void changeContainerResource(ContainerId containerId, + Resource deltaResource, boolean increase) { + if (increase) { + deductAvailableResource(deltaResource); + } else { + addAvailableResource(deltaResource); + } + + LOG.info((increase ? "Increased" : "Decreased") + " container " + + containerId + " of capacity " + deltaResource + " on host " + + rmNode.getNodeAddress() + ", which has " + numContainers + + " containers, " + getUsedResource() + " used and " + + getAvailableResource() + " available after allocation"); + } + + /** + * The Scheduler increased container + */ + public synchronized void increaseContainer(ContainerId containerId, + Resource deltaResource) { + changeContainerResource(containerId, deltaResource, true); + } + + /** + * The Scheduler decreased container + */ + public synchronized void decreaseContainer(ContainerId containerId, + Resource deltaResource) { + changeContainerResource(containerId, deltaResource, false); + } /** * Get available resources on the node. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index 8047d0b..abefee8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -361,7 +361,7 @@ public static AccessType toAccessType(QueueACL acl) { } public static boolean checkResourceRequestMatchingNodePartition( - ResourceRequest offswitchResourceRequest, String nodePartition, + String requestedPartition, String nodePartition, SchedulingMode schedulingMode) { // We will only look at node label = nodeLabelToLookAt according to // schedulingMode and partition of node. @@ -371,12 +371,11 @@ public static boolean checkResourceRequestMatchingNodePartition( } else { nodePartitionToLookAt = RMNodeLabelsManager.NO_LABEL; } - - String askedNodePartition = offswitchResourceRequest.getNodeLabelExpression(); - if (null == askedNodePartition) { - askedNodePartition = RMNodeLabelsManager.NO_LABEL; + + if (null == requestedPartition) { + requestedPartition = RMNodeLabelsManager.NO_LABEL; } - return askedNodePartition.equals(nodePartitionToLookAt); + return requestedPartition.equals(nodePartitionToLookAt); } private static boolean hasPendingResourceRequest(ResourceCalculator rc, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 4fd16c0..d969b3f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -132,16 +133,17 @@ public QueueInfo getQueueInfo(String queueName, boolean includeChildQueues, * @param release * @param blacklistAdditions * @param blacklistRemovals + * @param increaseRequests + * @param decreaseRequests * @return the {@link Allocation} for the application */ @Public @Stable - Allocation - allocate(ApplicationAttemptId appAttemptId, - List ask, - List release, - List blacklistAdditions, - List blacklistRemovals); + Allocation allocate(ApplicationAttemptId appAttemptId, + List ask, List release, + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests); /** * Get node resource usage report. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 792c25c..ca1dada 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -43,9 +43,10 @@ import org.apache.hadoop.yarn.security.PrivilegedEntity.EntityType; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -75,7 +76,7 @@ private boolean preemptionDisabled; // Track resource usage-by-label like used-resource/pending-resource, etc. - ResourceUsage queueUsage; + volatile ResourceUsage queueUsage; // Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity, // etc. @@ -339,22 +340,27 @@ public Resource getMinimumAllocation() { return minimumAllocation; } - synchronized void allocateResource(Resource clusterResource, - Resource resource, String nodePartition) { + synchronized void allocateResource(Resource clusterResource, + Resource resource, String nodePartition, boolean changeContainerResource) { queueUsage.incUsed(nodePartition, resource); - ++numContainers; + if (!changeContainerResource) { + ++numContainers; + } CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, minimumAllocation, this, labelManager, nodePartition); } protected synchronized void releaseResource(Resource clusterResource, - Resource resource, String nodePartition) { + Resource resource, String nodePartition, boolean changeContainerResource) { queueUsage.decUsed(nodePartition, resource); CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, minimumAllocation, this, labelManager, nodePartition); - --numContainers; + + if (!changeContainerResource) { + --numContainers; + } } @Private @@ -445,8 +451,8 @@ private Resource getCurrentLimitResource(String nodePartition, } synchronized boolean canAssignToThisQueue(Resource clusterResource, - String nodePartition, ResourceLimits currentResourceLimits, Resource resourceCouldBeUnreserved, - SchedulingMode schedulingMode) { + String nodePartition, ResourceLimits currentResourceLimits, + Resource resourceCouldBeUnreserved, SchedulingMode schedulingMode) { // Get current limited resource: // - When doing RESPECT_PARTITION_EXCLUSIVITY allocation, we will respect // queues' max capacity. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java index 928437f..68f6f12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java @@ -41,6 +41,7 @@ private final boolean skipped; private boolean fulfilledReservation; private final AssignmentInformation assignmentInformation; + private boolean increaseAllocation; public CSAssignment(Resource resource, NodeType type) { this(resource, type, null, null, false, false); @@ -138,4 +139,12 @@ public boolean isFulfilledReservation() { public AssignmentInformation getAssignmentInformation() { return this.assignmentInformation; } + + public boolean isIncreasedAllocation() { + return increaseAllocation; + } + + public void setIncreasedAllocation(boolean flag) { + increaseAllocation = flag; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index b06a646..c309bca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -37,6 +37,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -218,6 +220,14 @@ public void completedContainer(Resource clusterResource, boolean sortQueues); /** + * We have a reserved increased container in the queue, we need to unreserve + * it. Since we just want to cancel the reserved increase request instead of + * stop the container, we shouldn't call completedContainer for such purpose. + */ + public void unreserveContainerIncreaseRequest(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer); + + /** * Get the number of applications in the queue. * @return number of applications */ @@ -287,4 +297,11 @@ public void attachContainer(Resource clusterResource, * @return resourceUsage */ public ResourceUsage getQueueResourceUsage(); + + /** + * Decrease container resource in the queue + */ + public void decreaseContainer(Resource clusterResource, + SchedulerContainerResourceChangeRequest decreaseRequest, + FiCaSchedulerApp app); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 6a3e4c0..a64bab9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -96,6 +97,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueNotFoundException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; @@ -890,8 +893,10 @@ private synchronized void doneApplicationAttempt( @Override @Lock(Lock.NoLock.class) public Allocation allocate(ApplicationAttemptId applicationAttemptId, - List ask, List release, - List blacklistAdditions, List blacklistRemovals) { + List ask, List release, + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { @@ -902,6 +907,11 @@ public Allocation allocate(ApplicationAttemptId applicationAttemptId, SchedulerUtils.normalizeRequests( ask, getResourceCalculator(), getClusterResource(), getMinimumResourceCapability(), getMaximumResourceCapability()); + + // Pre-process increase requests + List normalizedIncreaseRequests = + checkAndNormalizeContainerChangeRequests(application, increaseRequests, + true); // Release containers releaseContainers(release, application); @@ -918,8 +928,8 @@ public Allocation allocate(ApplicationAttemptId applicationAttemptId, return EMPTY_ALLOCATION; } + // Process resource requests if (!ask.isEmpty()) { - if(LOG.isDebugEnabled()) { LOG.debug("allocate: pre-update " + applicationAttemptId + " ask size =" + ask.size()); @@ -936,8 +946,17 @@ public Allocation allocate(ApplicationAttemptId applicationAttemptId, application.showRequests(); } } + + // Process increase resource requests + if (application.updateIncreaseRequests(normalizedIncreaseRequests) + && (updateDemandForQueue == null)) { + updateDemandForQueue = (LeafQueue) application.getQueue(); + } application.updateBlacklist(blacklistAdditions, blacklistRemovals); + + // Decrease containers + decreaseContainers(decreaseRequests, application); allocation = application.getAllocation(getResourceCalculator(), clusterResource, getMinimumResourceCapability()); @@ -999,6 +1018,15 @@ private synchronized void nodeUpdate(RMNode nm) { for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer.getContainerId(), node); } + + // Processing the newly increased containers + List newlyIncreasedContainers = + nm.pullNewlyIncreasedContainers(); + if (null != newlyIncreasedContainers) { + for (Container container : newlyIncreasedContainers) { + containerIncreasedOnNode(container.getId(), node, container); + } + } // Process completed containers int releasedContainers = 0; @@ -1476,6 +1504,45 @@ protected synchronized void completedContainer(RMContainer rmContainer, container.getId(), queue.getQueuePath()); } } + + @Lock(CapacityScheduler.class) + @Override + protected synchronized void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + RMContainer rmContainer = decreaseRequest.getRMContainer(); + + // Check container status before doing decrease + if (rmContainer.getState() != RMContainerState.RUNNING) { + LOG.info("Trying to decrease a container not in RUNNING state, container=" + + rmContainer + " state=" + rmContainer.getState().name()); + return; + } + + // Delta capacity of this decrease request is 0, this decrease request may + // just to cancel increase request + if (Resources.equals(decreaseRequest.getDeltaCapacity(), Resources.none())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Decrease target resource equals to existing resource for container:" + + decreaseRequest.getContainerId() + + " ignore this decrease request."); + } + return; + } + + // Save resoruce before decrease + Resource resourceBeforeDecrease = + Resources.clone(rmContainer.getContainer().getResource()); + + FiCaSchedulerApp app = (FiCaSchedulerApp)attempt; + LeafQueue queue = (LeafQueue) attempt.getQueue(); + queue.decreaseContainer(clusterResource, decreaseRequest, app); + + LOG.info("Application attempt " + app.getApplicationAttemptId() + + " decreased container:" + decreaseRequest.getContainerId() + " from " + + resourceBeforeDecrease + " to " + + decreaseRequest.getTargetCapacity()); + } @Lock(Lock.NoLock.class) @VisibleForTesting diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index ff1baff..81cd8cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -60,10 +59,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FifoOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.Lock; @@ -730,17 +729,22 @@ private synchronized FiCaSchedulerApp getApplication( } private void handleExcessReservedContainer(Resource clusterResource, - CSAssignment assignment) { + CSAssignment assignment, FiCaSchedulerNode node, FiCaSchedulerApp app) { if (assignment.getExcessReservation() != null) { RMContainer excessReservedContainer = assignment.getExcessReservation(); - - completedContainer(clusterResource, assignment.getApplication(), - scheduler.getNode(excessReservedContainer.getAllocatedNode()), - excessReservedContainer, - SchedulerUtils.createAbnormalContainerStatus( - excessReservedContainer.getContainerId(), - SchedulerUtils.UNRESERVED_CONTAINER), - RMContainerEventType.RELEASED, null, false); + + if (excessReservedContainer.hasIncreaseReservation()) { + unreserveContainerIncreaseRequest(clusterResource, + app, node, excessReservedContainer); + } else { + completedContainer(clusterResource, assignment.getApplication(), + scheduler.getNode(excessReservedContainer.getAllocatedNode()), + excessReservedContainer, + SchedulerUtils.createAbnormalContainerStatus( + excessReservedContainer.getContainerId(), + SchedulerUtils.UNRESERVED_CONTAINER), + RMContainerEventType.RELEASED, null, false); + } assignment.setExcessReservation(null); } @@ -766,7 +770,8 @@ public synchronized CSAssignment assignContainers(Resource clusterResource, CSAssignment assignment = application.assignContainers(clusterResource, node, currentResourceLimits, schedulingMode, reservedContainer); - handleExcessReservedContainer(clusterResource, assignment); + handleExcessReservedContainer(clusterResource, assignment, node, + application); return assignment; } } @@ -824,7 +829,8 @@ public synchronized CSAssignment assignContainers(Resource clusterResource, // Did we schedule or reserve a container? Resource assigned = assignment.getResource(); - handleExcessReservedContainer(clusterResource, assignment); + handleExcessReservedContainer(clusterResource, assignment, node, + application); if (Resources.greaterThan(resourceCalculator, clusterResource, assigned, Resources.none())) { @@ -836,7 +842,8 @@ public synchronized CSAssignment assignContainers(Resource clusterResource, // Book-keeping // Note: Update headroom to account for current allocation too... allocateResource(clusterResource, application, assigned, - node.getPartition(), reservedOrAllocatedRMContainer); + node.getPartition(), reservedOrAllocatedRMContainer, + assignment.isIncreasedAllocation()); // Done return assignment; @@ -1086,6 +1093,37 @@ protected synchronized boolean canAssignToUser(Resource clusterResource, } return true; } + + @Override + public void unreserveContainerIncreaseRequest(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer) { + boolean removed = false; + Priority priority = null; + + synchronized (this) { + if (rmContainer.getContainer() != null) { + priority = rmContainer.getContainer().getPriority(); + } + + if (null != priority) { + removed = app.unreserve(rmContainer.getContainer().getPriority(), node, + rmContainer); + } + + if (removed) { + // Inform the ordering policy + orderingPolicy.containerReleased(app, rmContainer); + + releaseResource(clusterResource, app, rmContainer.getReservedResource(), + node.getPartition(), rmContainer, true); + } + } + + if (removed) { + getParent().unreserveContainerIncreaseRequest(clusterResource, app, node, + rmContainer); + } + } @Override public void completedContainer(Resource clusterResource, @@ -1093,6 +1131,15 @@ public void completedContainer(Resource clusterResource, ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue, boolean sortQueues) { if (application != null) { + // unreserve container increase request if it previously reserved. + if (rmContainer.hasIncreaseReservation()) { + unreserveContainerIncreaseRequest(clusterResource, application, node, + rmContainer); + } + + // Remove container increase request if it exists + application.removeIncreaseRequest(node.getNodeID(), + rmContainer.getAllocatedPriority(), rmContainer.getContainerId()); boolean removed = false; @@ -1123,7 +1170,7 @@ public void completedContainer(Resource clusterResource, orderingPolicy.containerReleased(application, rmContainer); releaseResource(clusterResource, application, container.getResource(), - node.getPartition(), rmContainer); + node.getPartition(), rmContainer, false); } } @@ -1137,8 +1184,10 @@ public void completedContainer(Resource clusterResource, synchronized void allocateResource(Resource clusterResource, SchedulerApplicationAttempt application, Resource resource, - String nodePartition, RMContainer rmContainer) { - super.allocateResource(clusterResource, resource, nodePartition); + String nodePartition, RMContainer rmContainer, + boolean isIncreasedAllocation) { + super.allocateResource(clusterResource, resource, nodePartition, + isIncreasedAllocation); // handle ignore exclusivity container if (null != rmContainer && rmContainer.getNodeLabelExpression().equals( @@ -1174,8 +1223,9 @@ synchronized void allocateResource(Resource clusterResource, synchronized void releaseResource(Resource clusterResource, FiCaSchedulerApp application, Resource resource, String nodePartition, - RMContainer rmContainer) { - super.releaseResource(clusterResource, resource, nodePartition); + RMContainer rmContainer, boolean isChangeResource) { + super.releaseResource(clusterResource, resource, nodePartition, + isChangeResource); // handle ignore exclusivity container if (null != rmContainer && rmContainer.getNodeLabelExpression().equals( @@ -1347,7 +1397,7 @@ public void recoverContainer(Resource clusterResource, FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); allocateResource(clusterResource, attempt, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); } getParent().recoverContainer(clusterResource, attempt, rmContainer); } @@ -1396,7 +1446,7 @@ public void attachContainer(Resource clusterResource, FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); allocateResource(clusterResource, application, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); LOG.info("movedContainer" + " container=" + rmContainer.getContainer() + " resource=" + rmContainer.getContainer().getResource() + " queueMoveIn=" + this + " usedCapacity=" + getUsedCapacity() @@ -1414,7 +1464,7 @@ public void detachContainer(Resource clusterResource, FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); releaseResource(clusterResource, application, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); LOG.info("movedContainer" + " container=" + rmContainer.getContainer() + " resource=" + rmContainer.getContainer().getResource() + " queueMoveOut=" + this + " usedCapacity=" + getUsedCapacity() @@ -1466,6 +1516,39 @@ public synchronized void setOrderingPolicy( public Priority getDefaultApplicationPriority() { return defaultAppPriorityPerQueue; } + + @Override + public void decreaseContainer(Resource clusterResource, + SchedulerContainerResourceChangeRequest decreaseRequest, + FiCaSchedulerApp app) { + // If the container being decreased is reserved, we need to unreserve it + // first. + RMContainer rmContainer = decreaseRequest.getRMContainer(); + if (rmContainer.hasIncreaseReservation()) { + unreserveContainerIncreaseRequest(clusterResource, app, + csContext.getNode(decreaseRequest.getNodeId()), rmContainer); + } + + // Delta capacity is negative when it's a decrease request + Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity()); + + synchronized (this) { + // Delta is negative when it's a decrease request + releaseResource(clusterResource, app, absDelta, + decreaseRequest.getNodePartition(), decreaseRequest.getRMContainer(), + true); + // Notify application + app.decreaseContainer(decreaseRequest); + // Notify node + csContext.getNode(decreaseRequest.getNodeId()) + .decreaseContainer(decreaseRequest.getContainerId(), absDelta); + } + + // Notify parent + if (getParent() != null) { + getParent().decreaseContainer(clusterResource, decreaseRequest, app); + } + } public synchronized OrderingPolicy getPendingAppsOrderingPolicy() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index e01204cc..38836e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.util.resource.Resources; @@ -430,7 +431,7 @@ public synchronized CSAssignment assignContainers(Resource clusterResource, assignedToChild.getResource(), Resources.none())) { // Track resource utilization for the parent-queue super.allocateResource(clusterResource, assignedToChild.getResource(), - node.getPartition()); + node.getPartition(), assignedToChild.isIncreasedAllocation()); // Track resource utilization in this pass of the scheduler Resources @@ -454,6 +455,8 @@ public synchronized CSAssignment assignContainers(Resource clusterResource, .addAll( assignedToChild.getAssignmentInformation() .getReservationDetails()); + assignment.setIncreasedAllocation(assignedToChild + .isIncreasedAllocation()); LOG.info("assignedContainer" + " queue=" + getQueueName() + @@ -616,6 +619,77 @@ private void printChildQueues() { } } + private synchronized void internalReleaseResource(Resource clusterResource, + FiCaSchedulerApp application, FiCaSchedulerNode node, + Resource releasedResource, boolean changeResource, + CSQueue completedChildQueue, boolean sortQueues) { + super.releaseResource(clusterResource, + releasedResource, node.getPartition(), + changeResource); + + if (LOG.isDebugEnabled()) { + LOG.debug("completedContainer " + this + ", cluster=" + clusterResource); + } + + // Note that this is using an iterator on the childQueues so this can't + // be called if already within an iterator for the childQueues. Like + // from assignContainersToChildQueues. + if (sortQueues) { + // reinsert the updated queue + for (Iterator iter = childQueues.iterator(); iter.hasNext();) { + CSQueue csqueue = iter.next(); + if (csqueue.equals(completedChildQueue)) { + iter.remove(); + if (LOG.isDebugEnabled()) { + LOG.debug("Re-sorting completed queue: " + csqueue); + } + childQueues.add(csqueue); + break; + } + } + } + + // If we skipped sort queue this time, we need to resort queues to make + // sure we allocate from least usage (or order defined by queue policy) + // queues. + needToResortQueuesAtNextAllocation = !sortQueues; + } + + @Override + public void decreaseContainer(Resource clusterResource, + SchedulerContainerResourceChangeRequest decreaseRequest, + FiCaSchedulerApp app) { + if (app != null) { + // delta capacity is negative when it's a decrease request + Resource absDeltaCapacity = + Resources.negate(decreaseRequest.getDeltaCapacity()); + + internalReleaseResource(clusterResource, app, + csContext.getNode(decreaseRequest.getNodeId()), + absDeltaCapacity, false, null, false); + + // Inform the parent + if (parent != null) { + parent.decreaseContainer(clusterResource, decreaseRequest, app); + } + } + } + + @Override + public void unreserveContainerIncreaseRequest(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer) { + if (app != null) { + internalReleaseResource(clusterResource, app, node, + rmContainer.getReservedResource(), false, null, false); + + // Inform the parent + if (parent != null) { + parent.unreserveContainerIncreaseRequest(clusterResource, app, node, + rmContainer); + } + } + } + @Override public void completedContainer(Resource clusterResource, FiCaSchedulerApp application, FiCaSchedulerNode node, @@ -623,40 +697,9 @@ public void completedContainer(Resource clusterResource, RMContainerEventType event, CSQueue completedChildQueue, boolean sortQueues) { if (application != null) { - // Careful! Locking order is important! - // Book keeping - synchronized (this) { - super.releaseResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); - - if (LOG.isDebugEnabled()) { - LOG.debug("completedContainer " + this + ", cluster=" + clusterResource); - } - - // Note that this is using an iterator on the childQueues so this can't - // be called if already within an iterator for the childQueues. Like - // from assignContainersToChildQueues. - if (sortQueues) { - // reinsert the updated queue - for (Iterator iter = childQueues.iterator(); - iter.hasNext();) { - CSQueue csqueue = iter.next(); - if(csqueue.equals(completedChildQueue)) { - iter.remove(); - if (LOG.isDebugEnabled()) { - LOG.debug("Re-sorting completed queue: " + csqueue); - } - childQueues.add(csqueue); - break; - } - } - } - - // If we skipped sort queue this time, we need to resort queues to make - // sure we allocate from least usage (or order defined by queue policy) - // queues. - needToResortQueuesAtNextAllocation = !sortQueues; - } + internalReleaseResource(clusterResource, application, node, + rmContainer.getContainer().getResource(), false, completedChildQueue, + sortQueues); // Inform the parent if (parent != null) { @@ -698,7 +741,7 @@ public void recoverContainer(Resource clusterResource, FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); super.allocateResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); + .getResource(), node.getPartition(), false); } if (parent != null) { parent.recoverContainer(clusterResource, attempt, rmContainer); @@ -726,7 +769,7 @@ public void attachContainer(Resource clusterResource, FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); super.allocateResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); + .getResource(), node.getPartition(), false); LOG.info("movedContainer" + " queueMoveIn=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" @@ -746,7 +789,7 @@ public void detachContainer(Resource clusterResource, scheduler.getNode(rmContainer.getContainer().getNodeId()); super.releaseResource(clusterResource, rmContainer.getContainer().getResource(), - node.getPartition()); + node.getPartition(), false); LOG.info("movedContainer" + " queueMoveOut=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java new file mode 100644 index 0000000..b986b1f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * For an application, resource limits and resource requests, decide how to + * allocate container. This is to make application resource allocation logic + * extensible. + */ +public abstract class AbstractContainerAllocator { + private static final Log LOG = LogFactory.getLog(AbstractContainerAllocator.class); + + FiCaSchedulerApp application; + final ResourceCalculator rc; + final RMContext rmContext; + + public AbstractContainerAllocator(FiCaSchedulerApp application, + ResourceCalculator rc, RMContext rmContext) { + this.application = application; + this.rc = rc; + this.rmContext = rmContext; + } + + protected CSAssignment getCSAssignmentFromAllocateResult( + Resource clusterResource, ContainerAllocation result, + RMContainer rmContainer) { + // Handle skipped + boolean skipped = + (result.getAllocationState() == AllocationState.APP_SKIPPED); + CSAssignment assignment = new CSAssignment(skipped); + assignment.setApplication(application); + + // Handle excess reservation + assignment.setExcessReservation(result.getContainerToBeUnreserved()); + + // If we allocated something + if (Resources.greaterThan(rc, clusterResource, + result.getResourceToBeAllocated(), Resources.none())) { + Resource allocatedResource = result.getResourceToBeAllocated(); + Container updatedContainer = result.getUpdatedContainer(); + + assignment.setResource(allocatedResource); + assignment.setType(result.getContainerNodeType()); + + if (result.getAllocationState() == AllocationState.RESERVED) { + // This is a reserved container + LOG.info("Reserved container " + " application=" + + application.getApplicationId() + " resource=" + allocatedResource + + " queue=" + this.toString() + " cluster=" + clusterResource); + assignment.getAssignmentInformation().addReservationDetails( + updatedContainer.getId(), + application.getCSLeafQueue().getQueuePath()); + assignment.getAssignmentInformation().incrReservations(); + Resources.addTo(assignment.getAssignmentInformation().getReserved(), + allocatedResource); + } else if (result.getAllocationState() == AllocationState.ALLOCATED){ + // This is a new container + // Inform the ordering policy + LOG.info("assignedContainer" + " application attempt=" + + application.getApplicationAttemptId() + " container=" + + updatedContainer.getId() + " queue=" + this + " clusterResource=" + + clusterResource); + + application + .getCSLeafQueue() + .getOrderingPolicy() + .containerAllocated(application, + application.getRMContainer(updatedContainer.getId())); + + assignment.getAssignmentInformation().addAllocationDetails( + updatedContainer.getId(), + application.getCSLeafQueue().getQueuePath()); + assignment.getAssignmentInformation().incrAllocations(); + Resources.addTo(assignment.getAssignmentInformation().getAllocated(), + allocatedResource); + + if (rmContainer != null) { + assignment.setFulfilledReservation(true); + } + } + } + + return assignment; + } + + /** + * allocate needs to handle following stuffs: + * + *
    + *
  • Select request: Select a request to allocate. E.g. select a resource + * request based on requirement/priority/locality.
  • + *
  • Check if a given resource can be allocated based on resource + * availability
  • + *
  • Do allocation: this will decide/create allocated/reserved + * container, this will also update metrics
  • + *
+ */ + public abstract CSAssignment assignContainers(Resource clusterResource, + FiCaSchedulerNode node, SchedulingMode schedulingMode, + ResourceLimits resourceLimits, RMContainer reservedContainer); +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java index 6e296cd..3be8e0e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java @@ -18,13 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; @@ -33,118 +30,50 @@ import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; -/** - * For an application, resource limits and resource requests, decide how to - * allocate container. This is to make application resource allocation logic - * extensible. - */ -public abstract class ContainerAllocator { - private static final Log LOG = LogFactory.getLog(ContainerAllocator.class); +public class ContainerAllocator extends AbstractContainerAllocator { + AbstractContainerAllocator increaseContainerAllocator; + AbstractContainerAllocator regularContainerAllocator; - FiCaSchedulerApp application; - final ResourceCalculator rc; - final RMContext rmContext; - public ContainerAllocator(FiCaSchedulerApp application, ResourceCalculator rc, RMContext rmContext) { - this.application = application; - this.rc = rc; - this.rmContext = rmContext; - } + super(application, rc, rmContext); - protected boolean checkHeadroom(Resource clusterResource, - ResourceLimits currentResourceLimits, Resource required, - FiCaSchedulerNode node) { - // If headroom + currentReservation < required, we cannot allocate this - // require - Resource resourceCouldBeUnReserved = application.getCurrentReservation(); - if (!application.getCSLeafQueue().getReservationContinueLooking() - || !node.getPartition().equals(RMNodeLabelsManager.NO_LABEL)) { - // If we don't allow reservation continuous looking, OR we're looking at - // non-default node partition, we won't allow to unreserve before - // allocation. - resourceCouldBeUnReserved = Resources.none(); - } - return Resources.greaterThanOrEqual(rc, clusterResource, Resources.add( - currentResourceLimits.getHeadroom(), resourceCouldBeUnReserved), - required); + increaseContainerAllocator = + new IncreaseContainerAllocator(application, rc, rmContext); + regularContainerAllocator = + new RegularContainerAllocator(application, rc, rmContext); } - protected CSAssignment getCSAssignmentFromAllocateResult( - Resource clusterResource, ContainerAllocation result, - RMContainer rmContainer) { - // Handle skipped - boolean skipped = - (result.getAllocationState() == AllocationState.APP_SKIPPED); - CSAssignment assignment = new CSAssignment(skipped); - assignment.setApplication(application); - - // Handle excess reservation - assignment.setExcessReservation(result.getContainerToBeUnreserved()); - - // If we allocated something - if (Resources.greaterThan(rc, clusterResource, - result.getResourceToBeAllocated(), Resources.none())) { - Resource allocatedResource = result.getResourceToBeAllocated(); - Container updatedContainer = result.getUpdatedContainer(); - - assignment.setResource(allocatedResource); - assignment.setType(result.getContainerNodeType()); - - if (result.getAllocationState() == AllocationState.RESERVED) { - // This is a reserved container - LOG.info("Reserved container " + " application=" - + application.getApplicationId() + " resource=" + allocatedResource - + " queue=" + this.toString() + " cluster=" + clusterResource); - assignment.getAssignmentInformation().addReservationDetails( - updatedContainer.getId(), - application.getCSLeafQueue().getQueuePath()); - assignment.getAssignmentInformation().incrReservations(); - Resources.addTo(assignment.getAssignmentInformation().getReserved(), - allocatedResource); - } else if (result.getAllocationState() == AllocationState.ALLOCATED){ - // This is a new container - // Inform the ordering policy - LOG.info("assignedContainer" + " application attempt=" - + application.getApplicationAttemptId() + " container=" - + updatedContainer.getId() + " queue=" + this + " clusterResource=" - + clusterResource); - - application - .getCSLeafQueue() - .getOrderingPolicy() - .containerAllocated(application, - application.getRMContainer(updatedContainer.getId())); - - assignment.getAssignmentInformation().addAllocationDetails( - updatedContainer.getId(), - application.getCSLeafQueue().getQueuePath()); - assignment.getAssignmentInformation().incrAllocations(); - Resources.addTo(assignment.getAssignmentInformation().getAllocated(), - allocatedResource); - - if (rmContainer != null) { - assignment.setFulfilledReservation(true); - } + @Override + public CSAssignment assignContainers(Resource clusterResource, + FiCaSchedulerNode node, SchedulingMode schedulingMode, + ResourceLimits resourceLimits, RMContainer reservedContainer) { + if (reservedContainer != null) { + if (reservedContainer.getState() == RMContainerState.RESERVED) { + // It's a regular container + return regularContainerAllocator.assignContainers(clusterResource, + node, schedulingMode, resourceLimits, reservedContainer); + } else { + // It's a increase container + return increaseContainerAllocator.assignContainers(clusterResource, + node, schedulingMode, resourceLimits, reservedContainer); + } + } else { + /* + * Try to allocate increase container first, and if we failed to allocate + * anything, we will try to allocate regular container + */ + CSAssignment assign = + increaseContainerAllocator.assignContainers(clusterResource, node, + schedulingMode, resourceLimits, null); + if (Resources.greaterThan(rc, clusterResource, assign.getResource(), + Resources.none())) { + return assign; } + + return regularContainerAllocator.assignContainers(clusterResource, node, + schedulingMode, resourceLimits, null); } - - return assignment; } - - /** - * allocate needs to handle following stuffs: - * - *
    - *
  • Select request: Select a request to allocate. E.g. select a resource - * request based on requirement/priority/locality.
  • - *
  • Check if a given resource can be allocated based on resource - * availability
  • - *
  • Do allocation: this will decide/create allocated/reserved - * container, this will also update metrics
  • - *
- */ - public abstract CSAssignment assignContainers(Resource clusterResource, - FiCaSchedulerNode node, SchedulingMode schedulingMode, - ResourceLimits resourceLimits, RMContainer reservedContainer); -} \ No newline at end of file + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java new file mode 100644 index 0000000..87ec741 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java @@ -0,0 +1,373 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AppSchedulingInfo; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + +public class IncreaseContainerAllocator extends AbstractContainerAllocator { + private static final Log LOG = + LogFactory.getLog(IncreaseContainerAllocator.class); + + public IncreaseContainerAllocator(FiCaSchedulerApp application, + ResourceCalculator rc, RMContext rmContext) { + super(application, rc, rmContext); + } + + /** + * Quick check if we can allocate anything here: + * We will not continue if: + * - Headroom doesn't support allocate minimumAllocation + * - + */ + private boolean checkHeadroom(Resource clusterResource, + ResourceLimits currentResourceLimits, Resource required) { + return Resources.greaterThanOrEqual(rc, clusterResource, + currentResourceLimits.getHeadroom(), required); + } + + private CSAssignment createReservedIncreasedCSAssignment( + SchedulerContainerResourceChangeRequest request) { + CSAssignment assignment = + new CSAssignment(request.getDeltaCapacity(), NodeType.NODE_LOCAL, null, + application, false, false); + Resources.addTo(assignment.getAssignmentInformation().getReserved(), + request.getDeltaCapacity()); + assignment.getAssignmentInformation().incrReservations(); + assignment.getAssignmentInformation().addReservationDetails( + request.getContainerId(), application.getCSLeafQueue().getQueuePath()); + assignment.setIncreasedAllocation(true); + + LOG.info("Reserved increase container request:" + request.toString()); + + return assignment; + } + + private CSAssignment createSuccessfullyIncreasedCSAssignment( + SchedulerContainerResourceChangeRequest request, boolean fromReservation) { + CSAssignment assignment = + new CSAssignment(request.getDeltaCapacity(), NodeType.NODE_LOCAL, null, + application, false, fromReservation); + Resources.addTo(assignment.getAssignmentInformation().getAllocated(), + request.getDeltaCapacity()); + assignment.getAssignmentInformation().incrAllocations(); + assignment.getAssignmentInformation().addAllocationDetails( + request.getContainerId(), application.getCSLeafQueue().getQueuePath()); + assignment.setIncreasedAllocation(true); + + // notify application + application + .getCSLeafQueue() + .getOrderingPolicy() + .containerAllocated(application, + application.getRMContainer(request.getContainerId())); + + LOG.info("Approved increase container request:" + request.toString() + + " fromReservation=" + fromReservation); + + return assignment; + } + + private CSAssignment allocateIncreaseRequestFromReservedContainer( + SchedulerNode node, + SchedulerContainerResourceChangeRequest increaseRequest, + RMContainer reservedContainer) { + if (Resources.fitsIn(increaseRequest.getDeltaCapacity(), + node.getAvailableResource())) { + // OK, we can allocate this increase request + // Unreserve it first + application.unreserve((FiCaSchedulerNode) node, + increaseRequest.getPriority()); + + // Notify application + application.increaseContainer(increaseRequest); + + // Notify node + node.increaseContainer(increaseRequest.getContainerId(), + increaseRequest.getDeltaCapacity()); + + return createSuccessfullyIncreasedCSAssignment(increaseRequest, true); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to allocate reserved increase request:" + + increaseRequest.toString() + + ". There's no enough available resource"); + } + + // We still cannot allocate this container, will wait for next turn + return CSAssignment.SKIP_ASSIGNMENT; + } + } + + private CSAssignment allocateIncreaseRequest(FiCaSchedulerNode node, + SchedulerContainerResourceChangeRequest increaseRequest) { + if (Resources.fitsIn(increaseRequest.getDeltaCapacity(), + node.getAvailableResource())) { + // OK, we can allocate this increase request + // Notify application + application.increaseContainer(increaseRequest); + return createSuccessfullyIncreasedCSAssignment(increaseRequest, false); + } else { + boolean reservationSucceeded = + application.reserveIncreasedContainer(increaseRequest.getPriority(), + node, increaseRequest.getRMContainer(), + increaseRequest.getDeltaCapacity()); + + if (reservationSucceeded) { + // We cannot allocate this container, but since queue capacity / + // user-limit matches, we can reserve this container on this node. + return createReservedIncreasedCSAssignment(increaseRequest); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Reserve increase request=" + increaseRequest.toString() + + " failed. Skipping.."); + } + return CSAssignment.SKIP_ASSIGNMENT; + } + } + } + + @Override + public CSAssignment assignContainers(Resource clusterResource, + FiCaSchedulerNode node, SchedulingMode schedulingMode, + ResourceLimits resourceLimits, RMContainer reservedContainer) { + AppSchedulingInfo sinfo = application.getAppSchedulingInfo(); + NodeId nodeId = node.getNodeID(); + + if (reservedContainer == null) { + // Do we have increase request on this node? + if (!sinfo.hasIncreaseRequest(nodeId)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skip allocating increase request since we don't have any" + + " increase request on this node=" + node.getNodeID()); + } + + return CSAssignment.SKIP_ASSIGNMENT; + } + + // Check if we need to unreserve something, note that we don't support + // continuousReservationLooking now. TODO, need think more about how to + // support it. + boolean shouldUnreserve = + Resources.greaterThan(rc, clusterResource, + resourceLimits.getAmountNeededUnreserve(), Resources.none()); + + // Check if we can allocate minimum resource according to headroom + boolean cannotAllocateAnything = + !checkHeadroom(clusterResource, resourceLimits, rmContext + .getScheduler().getMinimumResourceCapability()); + + // Skip the app if we failed either of above check + if (cannotAllocateAnything || shouldUnreserve) { + if (LOG.isDebugEnabled()) { + if (shouldUnreserve) { + LOG.debug("Cannot continue since we have to unreserve some resource" + + ", now increase container allocation doesn't " + + "support continuous reservation looking.."); + } + if (shouldUnreserve) { + LOG.debug("We cannot allocate anything because of low headroom, " + + "headroom=" + resourceLimits.getHeadroom()); + } + } + + return CSAssignment.SKIP_ASSIGNMENT; + } + + // Record increased container id / priority, and remove it from request map if it is satisfied. + ContainerId increasedContainerId = null; + Priority increasedContainerPriority = null; + CSAssignment assigned = null; + + /* + * Loop each priority, and containerId. Container priority is not + * equivalent to request priority, application master can run an important + * task on a less prioritized container. + * + * So behavior here is, we still try to increase container with higher + * priority, but will skip increase request and move to next increase + * request if queue-limit or user-limit aren't satisfied + */ + for (Priority priority : application.getPriorities()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Looking at increase request for application=" + + application.getApplicationAttemptId() + " priority=" + + priority); + } + + /* + * If we have multiple to-be-increased containers under same priority on + * a same host, we will try to increase earlier launched container + * first. And again - we will skip a request and move to next if it + * cannot be allocated. + */ + Map increaseRequestMap = + sinfo.getIncreaseRequests(nodeId, priority); + + // We don't have more increase request on this priority, skip.. + if (null == increaseRequestMap) { + if (LOG.isDebugEnabled()) { + LOG.debug("There's no increase request for " + + application.getApplicationAttemptId() + " priority=" + + priority); + } + continue; + } + Iterator> iter = + increaseRequestMap.entrySet().iterator(); + List toBeRemovedRequests = + new ArrayList<>(); + + while (iter.hasNext()) { + Entry entry = + iter.next(); + SchedulerContainerResourceChangeRequest increaseRequest = + entry.getValue(); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Looking at increase request=" + increaseRequest.toString()); + } + + boolean headroomSatisifed = checkHeadroom(clusterResource, + resourceLimits, increaseRequest.getDeltaCapacity()); + if (!headroomSatisifed) { + // skip if doesn't satisfy headroom limit + if (LOG.isDebugEnabled()) { + LOG.debug(" Headroom is satisifed, skip.."); + } + continue; + } + + RMContainer rmContainer = increaseRequest.getRMContainer(); + if (rmContainer.getContainerState() != ContainerState.RUNNING) { + // if the container is not running, we should remove the + // increaseRequest and continue; + if (LOG.isDebugEnabled()) { + LOG.debug(" Container is not running any more, skip..."); + } + toBeRemovedRequests.add(increaseRequest); + continue; + } + + if (!Resources.fitsIn(increaseRequest.getTargetCapacity(), + node.getTotalResource())) { + // if the target capacity is more than what the node can offer, we + // will simply remove and skip it. + // The reason of doing check here instead of adding increase request + // to scheduler because node's resource could be updated after + // request added. + if (LOG.isDebugEnabled()) { + LOG.debug(" Target capacity is more than what node can offer," + + " node.resource=" + node.getTotalResource()); + } + toBeRemovedRequests.add(increaseRequest); + continue; + } + + // Try to allocate the increase request + assigned = allocateIncreaseRequest(node, increaseRequest); + if (node.getReservedContainer() == null) { + // if it's not a reserved increase request, we will record + // priority/containerId so that we can remove the request later + increasedContainerPriority = priority; + increasedContainerId = rmContainer.getContainerId(); + break; + } + } + + // Remove invalid in request requests + if (!toBeRemovedRequests.isEmpty()) { + for (SchedulerContainerResourceChangeRequest req : toBeRemovedRequests) { + sinfo.removeIncreaseRequest(req.getNodeId(), req.getPriority(), + req.getContainerId()); + } + } + + // We already allocated something + if (assigned != null) { + break; + } + } + + if (increasedContainerId != null) { + // If we increased (not reserved) a new increase request, we should + // remove it from request map. + application.removeIncreaseRequest(nodeId, increasedContainerPriority, + increasedContainerId); + } + + return assigned == null ? CSAssignment.SKIP_ASSIGNMENT : assigned; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to allocate reserved increase container request.."); + } + + // We already reserved this increase container + SchedulerContainerResourceChangeRequest request = + sinfo.getIncreaseRequest(nodeId, reservedContainer.getContainer() + .getPriority(), reservedContainer.getContainerId()); + + // We will cancel the reservation any of following happens + // - Container finished + // - No increase request needed + // - Target resource updated + if (null == request + || reservedContainer.getContainerState() != ContainerState.RUNNING + || Resources.equals(reservedContainer.getReservedResource(), + request.getDeltaCapacity())) { + if (LOG.isDebugEnabled()) { + LOG.debug("We don't need reserved increase container request " + + "for container=" + reservedContainer.getContainerId() + + ". Unreserving and return..."); + } + + // We don't need this container now, just return excessive reservation + return new CSAssignment(application, reservedContainer); + } + + return allocateIncreaseRequestFromReservedContainer(node, request, + reservedContainer); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index dcb99ed..fd99d29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -46,7 +46,7 @@ * Allocate normal (new) containers, considers locality/label, etc. Using * delayed scheduling mechanism to get better locality allocation. */ -public class RegularContainerAllocator extends ContainerAllocator { +public class RegularContainerAllocator extends AbstractContainerAllocator { private static final Log LOG = LogFactory.getLog(RegularContainerAllocator.class); private ResourceRequest lastResourceRequest = null; @@ -56,6 +56,25 @@ public RegularContainerAllocator(FiCaSchedulerApp application, super(application, rc, rmContext); } + private boolean checkHeadroom(Resource clusterResource, + ResourceLimits currentResourceLimits, Resource required, + FiCaSchedulerNode node) { + // If headroom + currentReservation < required, we cannot allocate this + // require + Resource resourceCouldBeUnReserved = application.getCurrentReservation(); + if (!application.getCSLeafQueue().getReservationContinueLooking() + || !node.getPartition().equals(RMNodeLabelsManager.NO_LABEL)) { + // If we don't allow reservation continuous looking, OR we're looking at + // non-default node partition, we won't allow to unreserve before + // allocation. + resourceCouldBeUnReserved = Resources.none(); + } + return Resources.greaterThanOrEqual(rc, clusterResource, Resources.add( + currentResourceLimits.getHeadroom(), resourceCouldBeUnReserved), + required); + } + + private ContainerAllocation preCheckForNewContainer(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, Priority priority) { @@ -97,8 +116,9 @@ private ContainerAllocation preCheckForNewContainer(Resource clusterResource, // Is the node-label-expression of this offswitch resource request // matches the node's label? // If not match, jump to next priority. - if (!SchedulerUtils.checkResourceRequestMatchingNodePartition(anyRequest, - node.getPartition(), schedulingMode)) { + if (!SchedulerUtils.checkResourceRequestMatchingNodePartition( + anyRequest.getNodeLabelExpression(), node.getPartition(), + schedulingMode)) { return ContainerAllocation.PRIORITY_SKIPPED; } @@ -388,8 +408,8 @@ private ContainerAllocation assignContainer(Resource clusterResource, } // check if the resource request can access the label - if (!SchedulerUtils.checkResourceRequestMatchingNodePartition(request, - node.getPartition(), schedulingMode)) { + if (!SchedulerUtils.checkResourceRequestMatchingNodePartition( + request.getNodeLabelExpression(), node.getPartition(), schedulingMode)) { // this is a reserved container, but we cannot allocate it now according // to label not match. This can be caused by node label changed // We should un-reserve this container. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/AssignmentInformation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/AssignmentInformation.java index 5158255..71f5513 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/AssignmentInformation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/AssignmentInformation.java @@ -27,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @InterfaceAudience.Private @InterfaceStability.Unstable diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 74d77f5..38907fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -58,7 +59,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AbstractContainerAllocator; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -83,7 +84,7 @@ private ResourceScheduler scheduler; - private ContainerAllocator containerAllocator; + private AbstractContainerAllocator containerAllocator; public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, String user, Queue queue, ActiveUsersManager activeUsersManager, @@ -118,7 +119,7 @@ public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, rc = scheduler.getResourceCalculator(); } - containerAllocator = new RegularContainerAllocator(this, rc, rmContext); + containerAllocator = new ContainerAllocator(this, rc, rmContext); } synchronized public boolean containerCompleted(RMContainer rmContainer, @@ -209,13 +210,17 @@ synchronized public RMContainer allocate(NodeType type, FiCaSchedulerNode node, public boolean unreserve(Priority priority, FiCaSchedulerNode node, RMContainer rmContainer) { + if (rmContainer.hasIncreaseReservation()) { + rmContainer.cancelInceaseReservation(); + } + // Done with the reservation? if (unreserve(node, priority)) { node.unreserveResource(this); // Update reserved metrics queue.getMetrics().unreserveResource(getUser(), - rmContainer.getContainer().getResource()); + rmContainer.getReservedResource()); return true; } return false; @@ -241,7 +246,7 @@ public synchronized boolean unreserve(FiCaSchedulerNode node, Priority priority) // Reset the re-reservation count resetReReservations(priority); - Resource resource = reservedContainer.getContainer().getResource(); + Resource resource = reservedContainer.getReservedResource(); this.attemptResourceUsage.decReserved(node.getPartition(), resource); LOG.info("Application " + getApplicationId() + " unreserved " @@ -311,13 +316,15 @@ public synchronized Allocation getAllocation(ResourceCalculator rc, ResourceRequest rr = ResourceRequest.newInstance( Priority.UNDEFINED, ResourceRequest.ANY, minimumAllocation, numCont); - ContainersAndNMTokensAllocation allocation = - pullNewlyAllocatedContainersAndNMTokens(); + List newlyAllocatedContainers = pullNewlyAllocatedContainers(); + List newlyIncreasedContainers = pullNewlyIncreasedContainers(); + List newlyDecreasedContainers = pullNewlyDecreasedContainers(); + List updatedNMTokens = pullUpdatedNMTokens(); Resource headroom = getHeadroom(); setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, null, - currentContPreemption, Collections.singletonList(rr), - allocation.getNMTokenList()); + return new Allocation(newlyAllocatedContainers, headroom, null, + currentContPreemption, Collections.singletonList(rr), updatedNMTokens, + newlyIncreasedContainers, newlyDecreasedContainers); } synchronized public NodeId getNodeIdToUnreserve(Priority priority, @@ -332,15 +339,15 @@ synchronized public NodeId getNodeIdToUnreserve(Priority priority, if ((reservedContainers != null) && (!reservedContainers.isEmpty())) { for (Map.Entry entry : reservedContainers.entrySet()) { NodeId nodeId = entry.getKey(); - Resource containerResource = entry.getValue().getContainer().getResource(); + Resource reservedResource = entry.getValue().getReservedResource(); // make sure we unreserve one with at least the same amount of // resources, otherwise could affect capacity limits if (Resources.lessThanOrEqual(rc, clusterResource, - resourceNeedUnreserve, containerResource)) { + resourceNeedUnreserve, reservedResource)) { if (LOG.isDebugEnabled()) { LOG.debug("unreserving node with reservation size: " - + containerResource + + reservedResource + " in order to allocate container with size: " + resourceNeedUnreserve); } return nodeId; @@ -374,6 +381,25 @@ public synchronized void transferStateFromPreviousAttempt( this.headroomProvider = ((FiCaSchedulerApp) appAttempt).getHeadroomProvider(); } + + public boolean reserveIncreasedContainer(Priority priority, + FiCaSchedulerNode node, + RMContainer rmContainer, Resource reservedResource) { + // Inform the application + if (super.reserveIncreasedContainer(node, priority, rmContainer, + reservedResource)) { + + queue.getMetrics().reserveResource(getUser(), reservedResource); + + // Update the node + node.reserveResource(this, priority, rmContainer); + + // Succeeded + return true; + } + + return false; + } public void reserve(Priority priority, FiCaSchedulerNode node, RMContainer rmContainer, Container container) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 3eefb8f..2ce11e3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -19,7 +19,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; @@ -32,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -68,7 +76,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; @@ -904,7 +913,7 @@ private synchronized void removeNode(RMNode rmNode) { @Override public Allocation allocate(ApplicationAttemptId appAttemptId, List ask, List release, - List blacklistAdditions, List blacklistRemovals) { + List blacklistAdditions, List blacklistRemovals, List increaseRequests, List decreaseRequests) { // Make sure this application exists FSAppAttempt application = getSchedulerApp(appAttemptId); @@ -956,18 +965,17 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, } application.updateBlacklist(blacklistAdditions, blacklistRemovals); - ContainersAndNMTokensAllocation allocation = - application.pullNewlyAllocatedContainersAndNMTokens(); + List newlyAllocatedContainers = application.pullNewlyAllocatedContainers(); // Record container allocation time - if (!(allocation.getContainerList().isEmpty())) { + if (!(newlyAllocatedContainers.isEmpty())) { application.recordContainerAllocationTime(getClock().getTime()); } Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, - preemptionContainerIds, null, null, allocation.getNMTokenList()); + return new Allocation(newlyAllocatedContainers, headroom, + preemptionContainerIds, null, null, application.pullUpdatedNMTokens()); } } @@ -1692,4 +1700,11 @@ private String handleMoveToPlanQueue(String targetQueueName) { } return targetQueueName; } + + @Override + protected void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 6b77ceb..95d9171 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -76,7 +77,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerContainerResourceChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -312,7 +313,7 @@ public synchronized void setRMContext(RMContext rmContext) { @Override public Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, - List release, List blacklistAdditions, List blacklistRemovals) { + List release, List blacklistAdditions, List blacklistRemovals, List increaseRequests, List decreaseRequests) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { LOG.error("Calling allocate on removed " + @@ -357,12 +358,10 @@ public Allocation allocate( } application.updateBlacklist(blacklistAdditions, blacklistRemovals); - ContainersAndNMTokensAllocation allocation = - application.pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, null, - null, null, allocation.getNMTokenList()); + return new Allocation(application.pullNewlyAllocatedContainers(), + headroom, null, null, null, application.pullUpdatedNMTokens()); } } @@ -998,4 +997,12 @@ public synchronized boolean checkAccess(UserGroupInformation callerUGI, public Resource getUsedResource() { return usedResource; } + + @Override + protected void decreaseContainer( + SchedulerContainerResourceChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index b1fa80a..c6b954b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -1450,8 +1450,7 @@ protected ContainerLaunchContext createContainerLaunchContext( * object. * * @param credentials - * the CredentialsInfo provided by the user. - * @return + * the CredentialsInfo provided by the user */ private Credentials createCredentials(CredentialsInfo credentials) { Credentials ret = new Credentials(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index e62f7d7..b536546 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -323,7 +323,7 @@ private synchronized void addResourceRequest( // Get resources from the ResourceManager Allocation allocation = resourceManager.getResourceScheduler().allocate( applicationAttemptId, new ArrayList(ask), - new ArrayList(), null, null); + new ArrayList(), null, null, null, null); System.out.println("-=======" + applicationAttemptId); System.out.println("----------" + resourceManager.getRMContext().getRMApps() .get(applicationId).getRMAppAttempt(applicationAttemptId)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index 5660b78..8861aa1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -235,6 +236,14 @@ public AllocateResponse allocate( releases, null); return allocate(req); } + + public AllocateResponse allocateChangeContainerRequests( + List increaseRequests, + List decreaseRequests) throws Exception { + final AllocateRequest req = AllocateRequest.newInstance(0, 0F, null, null, + null, increaseRequests, decreaseRequests); + return allocate(req); + } public AllocateResponse allocate(AllocateRequest allocateRequest) throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index 53cb8d0..7ed6861 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -19,11 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -231,6 +233,17 @@ public long getLastHealthReportTime() { } return CommonNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResposneForContainersDecreasing( + NodeHeartbeatResponse response) { + + } + + @Override + public List pullNewlyIncreasedContainers() { + return Collections.emptyList(); + } }; private static RMNode buildRMNode(int rack, final Resource perNode, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 5080355..7f97b60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -189,6 +189,19 @@ public void waitForState(ApplicationAttemptId attemptId, } } + public void waitForContainerState(ContainerId containerId, + RMContainerState state) throws Exception { + int timeoutSecs = 0; + RMContainer container = getResourceScheduler().getRMContainer(containerId); + while ((container == null || container.getState() != state) + && timeoutSecs++ < 40) { + System.out.println( + "Waiting for" + containerId + " state to be:" + state.name()); + Thread.sleep(200); + } + Assert.assertTrue(container.getState() == state); + } + public void waitForContainerAllocated(MockNM nm, ContainerId containerId) throws Exception { int timeoutSecs = 0; @@ -756,8 +769,14 @@ public static MockAM launchAM(RMApp app, MockRM rm, MockNM nm) rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.SCHEDULED); System.out.println("Launch AM " + attempt.getAppAttemptId()); nm.nodeHeartbeat(true); + // Must wait for AM container allocated before send AM launched. If AM + // launched event sent before AM container get allocated, Am launched event + // will be ignored. + rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.ALLOCATED); MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId()); rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.LAUNCHED); + // Send AM container launched in next NM heartbeat + nm.nodeHeartbeat(attempt.getAppAttemptId(), 1L, ContainerState.RUNNING); return am; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index 85d2515..c6dc8e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -18,44 +18,48 @@ package org.apache.hadoop.yarn.server.resourcemanager; -import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; -import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; -import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import static java.lang.Thread.sleep; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; - +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; -import org.apache.hadoop.yarn.api.records.*; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException; +import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.*; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; import org.apache.hadoop.yarn.server.utils.BuilderUtils; - +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.Assert; - -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static java.lang.Thread.sleep; public class TestApplicationMasterService { private static final Log LOG = LogFactory @@ -343,6 +347,92 @@ public void testAllocateAfterUnregister() throws Exception { alloc1Response = am1.schedule(); Assert.assertEquals(0, alloc1Response.getAllocatedContainers().size()); } + + @Test(timeout=60000) + public void testInvalidIncreaseDecreaseRequest() throws Exception { + MockRM rm = new MockRM(conf); + + try { + rm.start(); + + // Register node1 + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); + + // Submit an application + RMApp app1 = rm.submitApp(1024); + + // kick the scheduling + nm1.nodeHeartbeat(true); + RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); + MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); + RegisterApplicationMasterResponse registerResponse = + am1.registerAppAttempt(); + + // Ask for a normal increase should be successfull + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.createResource(2048))), + null); + + // Target resource is negative, should fail + boolean exceptionCaught = false; + try { + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.createResource(-1))), + null); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + Assert.assertTrue(exceptionCaught); + + // Target resource is more than maxAllocation, should fail + try { + am1.allocateChangeContainerRequests( + Arrays + .asList(ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.add( + registerResponse.getMaximumResourceCapability(), + Resources.createResource(1)))), + null); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + + Assert.assertTrue(exceptionCaught); + + // Contains multiple increase/decrease requests for same contaienrId + try { + am1.allocateChangeContainerRequests( + Arrays + .asList(ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.add( + registerResponse.getMaximumResourceCapability(), + Resources.createResource(1)))), + Arrays + .asList(ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.add( + registerResponse.getMaximumResourceCapability(), + Resources.createResource(1))))); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + + Assert.assertTrue(exceptionCaught); + } finally { + if (rm != null) { + rm.close(); + } + } + } private static class MyResourceManager extends MockRM { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java index fccfa19..cd2d9bb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java @@ -165,7 +165,7 @@ public void testLogAggregationStatus() throws Exception { node1ReportForApp.add(report1); node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp)); + null, node1ReportForApp, null)); List node2ReportForApp = new ArrayList(); @@ -177,7 +177,7 @@ public void testLogAggregationStatus() throws Exception { node2ReportForApp.add(report2); node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node2ReportForApp)); + null, node2ReportForApp, null)); // node1 and node2 has updated its log aggregation status // verify that the log aggregation status for node1, node2 // has been changed @@ -215,7 +215,7 @@ public void testLogAggregationStatus() throws Exception { node1ReportForApp2.add(report1_2); node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp2)); + null, node1ReportForApp2, null)); // verify that the log aggregation status for node1 // has been changed @@ -284,7 +284,7 @@ public void testLogAggregationStatus() throws Exception { // 10 diagnostic messages/failure messages node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp3)); + null, node1ReportForApp3, null)); logAggregationStatus = rmApp.getLogAggregationReportsForApp(); Assert.assertEquals(2, logAggregationStatus.size()); @@ -329,7 +329,7 @@ public void testLogAggregationStatus() throws Exception { node2ReportForApp2.add(report2_3); node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node2ReportForApp2)); + null, node2ReportForApp2, null)); Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport()); logAggregationStatus = rmApp.getLogAggregationReportsForApp(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 10ec453..828e149 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -465,10 +465,9 @@ private void testAppAttemptScheduledState() { expectedAllocateCount = 1; } - assertEquals(expectedState, - applicationAttempt.getAppAttemptState()); - verify(scheduler, times(expectedAllocateCount)). - allocate(any(ApplicationAttemptId.class), + assertEquals(expectedState, applicationAttempt.getAppAttemptState()); + verify(scheduler, times(expectedAllocateCount)).allocate( + any(ApplicationAttemptId.class), any(List.class), any(List.class), any(List.class), any(List.class), any(List.class), any(List.class)); assertEquals(0,applicationAttempt.getJustFinishedContainers().size()); @@ -488,11 +487,9 @@ private void testAppAttemptAllocatedState(Container amContainer) { assertEquals(amContainer, applicationAttempt.getMasterContainer()); // Check events verify(applicationMasterLauncher).handle(any(AMLauncherEvent.class)); - verify(scheduler, times(2)). - allocate( - any( - ApplicationAttemptId.class), any(List.class), any(List.class), - any(List.class), any(List.class)); + verify(scheduler, times(2)).allocate(any(ApplicationAttemptId.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class), any(List.class)); verify(nmTokenManager).clearNodeSetForAttempt( applicationAttempt.getAppAttemptId()); } @@ -641,13 +638,9 @@ private Container allocateApplicationAttempt() { Allocation allocation = mock(Allocation.class); when(allocation.getContainers()). thenReturn(Collections.singletonList(container)); - when( - scheduler.allocate( - any(ApplicationAttemptId.class), - any(List.class), - any(List.class), - any(List.class), - any(List.class))). + when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class))). thenReturn(allocation); RMContainer rmContainer = mock(RMContainerImpl.class); when(scheduler.getRMContainer(container.getId())). @@ -1511,10 +1504,9 @@ public void testContainersCleanupForLastAttempt() { @Test public void testScheduleTransitionReplaceAMContainerRequestWithDefaults() { YarnScheduler mockScheduler = mock(YarnScheduler.class); - when( - mockScheduler.allocate(any(ApplicationAttemptId.class), - any(List.class), any(List.class), any(List.class), any(List.class))) - .thenAnswer(new Answer() { + when(mockScheduler.allocate(any(ApplicationAttemptId.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class), any(List.class))).thenAnswer(new Answer() { @SuppressWarnings("rawtypes") @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index e4e2049..c3064fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -21,7 +21,6 @@ import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyLong; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; @@ -235,12 +234,123 @@ public void testExpireWhileRunning() { rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType.EXPIRE)); drainDispatcher.await(); - assertEquals(RMContainerState.RUNNING, rmContainer.getState()); + assertEquals(RMContainerState.EXPIRED, rmContainer.getState()); verify(writer, never()).containerFinished(any(RMContainer.class)); verify(publisher, never()).containerFinished(any(RMContainer.class), anyLong()); } + private void testExpireAfterIncreased(boolean acquired) { + /* + * Similar to previous test, a container is increased but not acquired by + * AM. In this case, if a container is expired, the container should be + * finished. + */ + DrainDispatcher drainDispatcher = new DrainDispatcher(); + EventHandler appAttemptEventHandler = mock(EventHandler.class); + EventHandler generic = mock(EventHandler.class); + drainDispatcher.register(RMAppAttemptEventType.class, + appAttemptEventHandler); + drainDispatcher.register(RMNodeEventType.class, generic); + drainDispatcher.init(new YarnConfiguration()); + drainDispatcher.start(); + NodeId nodeId = BuilderUtils.newNodeId("host", 3425); + ApplicationId appId = BuilderUtils.newApplicationId(1, 1); + ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( + appId, 1); + ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); + ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); + + Resource resource = BuilderUtils.newResource(512, 1); + Priority priority = BuilderUtils.newPriority(5); + + Container container = BuilderUtils.newContainer(containerId, nodeId, + "host:3465", resource, priority, null); + + RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); + SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); + RMContext rmContext = mock(RMContext.class); + when(rmContext.getDispatcher()).thenReturn(drainDispatcher); + when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer); + when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); + when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); + ConcurrentMap apps = + new ConcurrentHashMap(); + apps.put(appId, mock(RMApp.class)); + when(rmContext.getRMApps()).thenReturn(apps); + RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, + nodeId, "user", rmContext); + + assertEquals(RMContainerState.NEW, rmContainer.getState()); + assertEquals(resource, rmContainer.getAllocatedResource()); + assertEquals(nodeId, rmContainer.getAllocatedNode()); + assertEquals(priority, rmContainer.getAllocatedPriority()); + verify(writer).containerStarted(any(RMContainer.class)); + verify(publisher).containerCreated(any(RMContainer.class), anyLong()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.START)); + drainDispatcher.await(); + assertEquals(RMContainerState.ALLOCATED, rmContainer.getState()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.ACQUIRED)); + drainDispatcher.await(); + assertEquals(RMContainerState.ACQUIRED, rmContainer.getState()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.LAUNCHED)); + drainDispatcher.await(); + assertEquals(RMContainerState.RUNNING, rmContainer.getState()); + assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user", + rmContainer.getLogURL()); + + // newResource is more than the old resource + Resource newResource = BuilderUtils.newResource(1024, 2); + rmContainer.handle(new RMContainerChangeResourceEvent(containerId, + newResource, true)); + + if (acquired) { + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.ACQUIRE_UPDATED_CONTAINER)); + drainDispatcher.await(); + // status is still RUNNING since this is a increased container acquired by + // AM + assertEquals(RMContainerState.RUNNING, rmContainer.getState()); + } + + // In RUNNING state. Verify EXPIRE and associated actions. + reset(appAttemptEventHandler); + ContainerStatus containerStatus = SchedulerUtils + .createAbnormalContainerStatus(containerId, + SchedulerUtils.EXPIRED_CONTAINER); + rmContainer.handle(new RMContainerFinishedEvent(containerId, + containerStatus, RMContainerEventType.EXPIRE)); + drainDispatcher.await(); + assertEquals(RMContainerState.EXPIRED, rmContainer.getState()); + + // Container will be finished only when it is acquired by AM after increase, + // we will only notify expirer when it is acquired by AM. + if (acquired) { + verify(writer, times(1)).containerFinished(any(RMContainer.class)); + verify(publisher, times(1)).containerFinished(any(RMContainer.class), + anyLong()); + } else { + verify(writer, never()).containerFinished(any(RMContainer.class)); + verify(publisher, never()).containerFinished(any(RMContainer.class), + anyLong()); + } + } + + @Test + public void testExpireAfterContainerResourceIncreased() throws Exception { + // expire after increased and acquired by AM + testExpireAfterIncreased(true); + // expire after increased but not acquired by AM + testExpireAfterIncreased(false); + } + @Test public void testExistenceOfResourceRequestInRMContainer() throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index aff169e..8a6b019 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -671,11 +672,11 @@ public void testBlackListNodes() throws Exception { // Verify the blacklist can be updated independent of requesting containers cs.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); Assert.assertTrue(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); cs.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); Assert.assertFalse(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); rm.stop(); } @@ -763,7 +764,7 @@ public void testAllocateReorder() throws Exception { cs.allocate(appAttemptId1, Collections.singletonList(r1), Collections.emptyList(), - null, null); + null, null, null, null); //And this will result in container assignment for app1 CapacityScheduler.schedule(cs); @@ -780,7 +781,7 @@ public void testAllocateReorder() throws Exception { cs.allocate(appAttemptId2, Collections.singletonList(r2), Collections.emptyList(), - null, null); + null, null, null, null); //In this case we do not perform container assignment because we want to //verify re-ordering based on the allocation alone @@ -2891,7 +2892,7 @@ public void testApplicationHeadRoom() throws Exception { Allocation allocate = cs.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null); + Collections. emptyList(), null, null, null, null); Assert.assertNotNull(attempt); @@ -2907,7 +2908,7 @@ public void testApplicationHeadRoom() throws Exception { allocate = cs.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null); + Collections. emptyList(), null, null, null, null); // All resources should be sent as headroom Assert.assertEquals(newResource, allocate.getResourceLimit()); @@ -2951,7 +2952,105 @@ public void testAMLimitUsage() throws Exception { config.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName()); verifyAMLimitForLeafQueue(config); + } + + private FiCaSchedulerApp getFiCaSchedulerApp(MockRM rm, + ApplicationId appId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + return cs.getSchedulerApplications().get(appId).getCurrentAppAttempt(); + } + @Test + public void testPendingResourceUpdatedAccordingToIncreaseRequestChanges() + throws Exception { + Configuration conf = + TestUtils.getConfigurationWithQueueLabels(new Configuration(false)); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + + final RMNodeLabelsManager mgr = new NullRMNodeLabelsManager(); + mgr.init(conf); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + MockRM rm = new MockRM(conf, memStore) { + protected RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + + rm.start(); + + MockNM nm1 = // label = "" + new MockNM("h1:1234", 200 * GB, rm.getResourceTrackerService()); + nm1.registerNode(); + + // Launch app1 in queue=a1 + RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "a1"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 2)), + null); + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + ContainerId containerId3 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 3); + Assert.assertTrue(rm.waitForState(nm1, containerId3, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3L, + ContainerState.RUNNING); + + // am1 asks to change its AM container from 1GB to 3GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(3 * GB))), + null); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm, app1.getApplicationId()); + + Assert.assertEquals(2 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 2 * GB, null); + checkPendingResource(rm, "a", 2 * GB, null); + checkPendingResource(rm, "root", 2 * GB, null); + + // am1 asks to change containerId2 (2G -> 3G) and containerId3 (2G -> 5G) + am1.allocateChangeContainerRequests(Arrays.asList( + ContainerResourceChangeRequest.newInstance(containerId2, + Resources.createResource(3 * GB)), + ContainerResourceChangeRequest.newInstance(containerId3, + Resources.createResource(5 * GB))), + null); + + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 6 * GB, null); + checkPendingResource(rm, "a", 6 * GB, null); + checkPendingResource(rm, "root", 6 * GB, null); + + // am1 asks to change containerId1 (1G->3G), containerId2 (2G -> 4G) and + // containerId3 (2G -> 2G) + am1.allocateChangeContainerRequests(Arrays.asList( + ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(3 * GB)), + ContainerResourceChangeRequest.newInstance(containerId2, + Resources.createResource(4 * GB)), + ContainerResourceChangeRequest.newInstance(containerId3, + Resources.createResource(2 * GB))), + null); + Assert.assertEquals(4 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 4 * GB, null); + checkPendingResource(rm, "a", 4 * GB, null); + checkPendingResource(rm, "root", 4 * GB, null); } private void verifyAMLimitForLeafQueue(CapacitySchedulerConfiguration config) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java index 9dcab2e..88c7c13 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java @@ -132,11 +132,11 @@ public CSAssignment answer(InvocationOnMock invocation) throws Throwable { final Resource allocatedResource = Resources.createResource(allocation); if (queue instanceof ParentQueue) { ((ParentQueue)queue).allocateResource(clusterResource, - allocatedResource, RMNodeLabelsManager.NO_LABEL); + allocatedResource, RMNodeLabelsManager.NO_LABEL, false); } else { FiCaSchedulerApp app1 = getMockApplication(0, ""); ((LeafQueue)queue).allocateResource(clusterResource, app1, - allocatedResource, null, null); + allocatedResource, null, null, false); } // Next call - nothing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index 769041b..b5b2222 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -60,9 +59,6 @@ import org.junit.Before; import org.junit.Test; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - public class TestContainerAllocation { @@ -199,13 +195,16 @@ public void testNormalContainerAllocationWhenDNSUnavailable() throws Exception{ // acquire the container. SecurityUtilTestHelper.setTokenServiceUseIp(true); - List containers = - am1.allocate(new ArrayList(), - new ArrayList()).getAllocatedContainers(); - // not able to fetch the container; - Assert.assertEquals(0, containers.size()); - - SecurityUtilTestHelper.setTokenServiceUseIp(false); + List containers; + try { + containers = + am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + // not able to fetch the container; + Assert.assertEquals(0, containers.size()); + } finally { + SecurityUtilTestHelper.setTokenServiceUseIp(false); + } containers = am1.allocate(new ArrayList(), new ArrayList()).getAllocatedContainers(); @@ -315,21 +314,24 @@ protected RMSecretManagerService createRMSecretManagerService() { rm1.start(); MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); - SecurityUtilTestHelper.setTokenServiceUseIp(true); - RMApp app1 = rm1.submitApp(200); - RMAppAttempt attempt = app1.getCurrentAppAttempt(); - nm1.nodeHeartbeat(true); - - // fetching am container will fail, keep retrying 5 times. - while (numRetries <= 5) { + RMApp app1; + try { + SecurityUtilTestHelper.setTokenServiceUseIp(true); + app1 = rm1.submitApp(200); + RMAppAttempt attempt = app1.getCurrentAppAttempt(); nm1.nodeHeartbeat(true); - Thread.sleep(1000); - Assert.assertEquals(RMAppAttemptState.SCHEDULED, - attempt.getAppAttemptState()); - System.out.println("Waiting for am container to be allocated."); - } - SecurityUtilTestHelper.setTokenServiceUseIp(false); + // fetching am container will fail, keep retrying 5 times. + while (numRetries <= 5) { + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + Assert.assertEquals(RMAppAttemptState.SCHEDULED, + attempt.getAppAttemptState()); + System.out.println("Waiting for am container to be allocated."); + } + } finally { + SecurityUtilTestHelper.setTokenServiceUseIp(false); + } MockRM.launchAndRegisterAM(app1, rm1, nm1); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java new file mode 100644 index 0000000..4c85ec9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java @@ -0,0 +1,919 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestContainerResizing { + private final int GB = 1024; + + private YarnConfiguration conf; + + RMNodeLabelsManager mgr; + + @Before + public void setUp() throws Exception { + conf = new YarnConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + mgr = new NullRMNodeLabelsManager(); + mgr.init(conf); + } + + @Test + public void testSimpleIncreaseContainer() throws Exception { + /** + * Application has a container running, and the node has enough available + * resource. Add a increase request to see if container will be increased + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + // am1 asks to change its AM container from 1GB to 3GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(3 * GB))), + null); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + checkPendingResource(rm1, "default", 2 * GB, null); + Assert.assertEquals(2 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + // Pending resource should be deducted + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + verifyContainerIncreased(am1.allocate(null, null), containerId1, 3 * GB); + rm1.close(); + } + + @Test + public void testSimpleDecreaseContainer() throws Exception { + /** + * Application has a container running, try to decrease the container and + * check queue's usage and container resource will be updated. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(3 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + checkUsedResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + // am1 asks to change its AM container from 1GB to 3GB + AllocateResponse response = am1.allocateChangeContainerRequests(null, + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(1 * GB)))); + + verifyContainerDecreased(response, containerId1, 1 * GB); + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + rm1.close(); + } + + @Test + public void testSimpleIncreaseRequestReservation() throws Exception { + /** + * Application has two containers running, try to increase one of then, node + * doesn't have enough resource, so the increase request will be reserved. + * Check resource usage after container reserved, finish a container, the + * reserved container should be allocated. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + + // am1 asks to change its AM container from 1GB to 3GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(7 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and do another allocation + am1.allocate(null, Arrays.asList(containerId2)); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + // Now container should be increased + verifyContainerIncreased(am1.allocate(null, null), containerId1, 7 * GB); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 7 * GB, null); + Assert.assertEquals(7 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(7 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testExcessiveReservationWhenCancelIncreaseRequest() + throws Exception { + /** + * Application has two containers running, try to increase one of then, node + * doesn't have enough resource, so the increase request will be reserved. + * Check resource usage after container reserved, finish a container & + * cancel the increase request, reservation should be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + + // am1 asks to change its AM container from 1GB to 3GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(7 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and cancel increase request (via send a increase + // request, make target_capacity=existing_capacity) + am1.allocate(null, Arrays.asList(containerId2)); + // am1 asks to change its AM container from 1G to 1G (cancel the increase + // request actually) + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(1 * GB))), + null); + // Trigger a node heartbeat.. + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testExcessiveReservationWhenDecreaseSameContainer() + throws Exception { + /** + * Very similar to testExcessiveReservationWhenCancelIncreaseRequest, after + * the increase request reserved, it decreases the reserved container, + * container should be decreased and reservation will be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(2 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + + // am1 asks to change its AM container from 2GB to 8GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(4 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and cancel increase request (via send a increase + // request, make target_capacity=existing_capacity) + am1.allocate(null, Arrays.asList(containerId2)); + // am1 asks to change its AM container from 2G to 1G (decrease) + am1.allocateChangeContainerRequests( + null, + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId1, + Resources.createResource(1 * GB))) + ); + // Trigger a node heartbeat.. + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerUnreservedWhenContainerCompleted() + throws Exception { + /** + * App has two containers on the same node (node.resource = 8G), container1 + * = 2G, container2 = 2G. App asks to increase container2 to 8G. + * + * So increase container request will be reserved. When app releases + * container2, reserved part should be released as well. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + rm1.waitForContainerState(containerId2, RMContainerState.RUNNING); + + // am1 asks to change its AM container from 2GB to 8GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId2, + Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer2 = app.getLiveContainersMap().get(containerId2); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer2.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer2.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete container2, container will be unreserved and completed + am1.allocate(null, Arrays.asList(containerId2)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer2.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerUnreservedWhenApplicationCompleted() + throws Exception { + /** + * Similar to testIncreaseContainerUnreservedWhenContainerCompleted, when + * application finishes, reserved increase container should be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2L, + ContainerState.RUNNING); + + // am1 asks to change its AM container from 2GB to 8GB + am1.allocateChangeContainerRequests( + Arrays.asList(ContainerResourceChangeRequest.newInstance(containerId2, + Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer2 = app.getLiveContainersMap().get(containerId2); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer2.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer2.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Kill the application + cs.handle(new AppAttemptRemovedSchedulerEvent(am1.getApplicationAttemptId(), + RMAppAttemptState.KILLED, false)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer2.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + private void allocateAndLaunchContainers(MockAM am, MockNM nm, MockRM rm, + int nContainer, int mem, int priority, int startContainerId) + throws Exception { + am.allocate(Arrays + .asList(ResourceRequest.newInstance(Priority.newInstance(priority), "*", + Resources.createResource(mem), nContainer)), + null); + ContainerId lastContainerId = ContainerId.newContainerId( + am.getApplicationAttemptId(), startContainerId + nContainer - 1); + Assert.assertTrue(rm.waitForState(nm, lastContainerId, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am.allocate(null, null); + + for (int cId = startContainerId; cId < startContainerId + + nContainer; cId++) { + nm.nodeHeartbeat(am.getApplicationAttemptId(), cId, + ContainerState.RUNNING); + rm.waitForContainerState( + ContainerId.newContainerId(am.getApplicationAttemptId(), cId), + RMContainerState.RUNNING); + } + } + + @Test + public void testOrderOfIncreaseContainerRequestAllocation() + throws Exception { + /** + * There're multiple containers need to be increased, check container will + * be increase sorted by priority, if priority is same, smaller containerId + * container will get preferred + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + ApplicationAttemptId attemptId = am1.getApplicationAttemptId(); + + // Container 2, 3 (priority=3) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2); + + // Container 4, 5 (priority=2) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4); + + // Container 6, 7 (priority=4) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6); + + // am1 asks to change its container[2-7] from 1G to 2G + List increaseRequests = new ArrayList<>(); + for (int cId = 2; cId <= 7; cId++) { + ContainerId containerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), cId); + increaseRequests.add(ContainerResourceChangeRequest + .newInstance(containerId, Resources.createResource(2 * GB))); + } + am1.allocateChangeContainerRequests(increaseRequests, null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // Get rmNode1 + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // assignContainer, container-4/5/2 increased (which has highest priority OR + // earlier allocated) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + AllocateResponse allocateResponse = am1.allocate(null, null); + Assert.assertEquals(3, allocateResponse.getIncreasedContainers().size()); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 4), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 5), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 2), 2 * GB); + + /* Check statuses after allocation */ + // There're still 3 pending increase requests + checkPendingResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(10 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerRequestGetPreferrence() + throws Exception { + /** + * There're multiple containers need to be increased, and there're several + * container allocation request, scheduler will try to increase container + * before allocate new containers + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + ApplicationAttemptId attemptId = am1.getApplicationAttemptId(); + + // Container 2, 3 (priority=3) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2); + + // Container 4, 5 (priority=2) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4); + + // Container 6, 7 (priority=4) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6); + + // am1 asks to change its container[2-7] from 1G to 2G + List increaseRequests = new ArrayList<>(); + for (int cId = 2; cId <= 7; cId++) { + ContainerId containerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), cId); + increaseRequests.add(ContainerResourceChangeRequest + .newInstance(containerId, Resources.createResource(2 * GB))); + } + am1.allocateChangeContainerRequests(increaseRequests, null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // Get rmNode1 + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // assignContainer, container-4/5/2 increased (which has highest priority OR + // earlier allocated) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + AllocateResponse allocateResponse = am1.allocate(null, null); + Assert.assertEquals(3, allocateResponse.getIncreasedContainers().size()); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 4), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 5), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 2), 2 * GB); + + /* Check statuses after allocation */ + // There're still 3 pending increase requests + checkPendingResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(10 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + private void checkPendingResource(MockRM rm, String queueName, int memory, + String label) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + CSQueue queue = cs.getQueue(queueName); + Assert.assertEquals(memory, + queue.getQueueResourceUsage() + .getPending(label == null ? RMNodeLabelsManager.NO_LABEL : label) + .getMemory()); + } + + private void checkUsedResource(MockRM rm, String queueName, int memory, + String label) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + CSQueue queue = cs.getQueue(queueName); + Assert.assertEquals(memory, + queue.getQueueResourceUsage() + .getUsed(label == null ? RMNodeLabelsManager.NO_LABEL : label) + .getMemory()); + } + + private void verifyContainerIncreased(AllocateResponse response, + ContainerId containerId, int mem) { + List increasedContainers = response.getIncreasedContainers(); + boolean found = false; + for (Container c : increasedContainers) { + if (c.getId().equals(containerId)) { + found = true; + Assert.assertEquals(mem, c.getResource().getMemory()); + } + } + if (!found) { + Assert.fail("Container not increased: containerId=" + containerId); + } + } + + private void verifyContainerDecreased(AllocateResponse response, + ContainerId containerId, int mem) { + List decreasedContainers = response.getDecreasedContainers(); + boolean found = false; + for (Container c : decreasedContainers) { + if (c.getId().equals(containerId)) { + found = true; + Assert.assertEquals(mem, c.getResource().getMemory()); + } + } + if (!found) { + Assert.fail("Container not decreased: containerId=" + containerId); + } + } + + private FiCaSchedulerApp getFiCaSchedulerApp(MockRM rm, + ApplicationId appId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + return cs.getSchedulerApplications().get(appId).getCurrentAppAttempt(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index fe8be06..b85c697 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -770,9 +770,9 @@ public void testComputeUserLimitAndSetHeadroom(){ qb.finishApplication(app_0.getApplicationId(), user_0); qb.finishApplication(app_2.getApplicationId(), user_1); qb.releaseResource(clusterResource, app_0, app_0.getResource(u0Priority), - null, null); + null, null, false); qb.releaseResource(clusterResource, app_2, app_2.getResource(u1Priority), - null, null); + null, null, false); qb.setUserLimit(50); qb.setUserLimitFactor(1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java index ef35093..4a815f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java @@ -144,11 +144,11 @@ public CSAssignment answer(InvocationOnMock invocation) throws Throwable { final Resource allocatedResource = Resources.createResource(allocation); if (queue instanceof ParentQueue) { ((ParentQueue)queue).allocateResource(clusterResource, - allocatedResource, RMNodeLabelsManager.NO_LABEL); + allocatedResource, RMNodeLabelsManager.NO_LABEL, false); } else { FiCaSchedulerApp app1 = getMockApplication(0, ""); ((LeafQueue)queue).allocateResource(clusterResource, app1, - allocatedResource, null, null); + allocatedResource, null, null, false); } // Next call - nothing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index 403c8ea..52f8430 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -163,7 +163,7 @@ protected ApplicationAttemptId createSchedulingRequest( resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); - scheduler.allocate(id, ask, new ArrayList(), null, null); + scheduler.allocate(id, ask, new ArrayList(), null, null, null, null); return id; } @@ -189,7 +189,7 @@ protected ApplicationAttemptId createSchedulingRequest(String queueId, resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); - scheduler.allocate(id, ask, new ArrayList(), null, null); + scheduler.allocate(id, ask, new ArrayList(), null, null, null, null); return id; } @@ -211,7 +211,7 @@ protected void createSchedulingRequestExistingApplication( ResourceRequest request, ApplicationAttemptId attId) { List ask = new ArrayList(); ask.add(request); - scheduler.allocate(attId, ask, new ArrayList(), null, null); + scheduler.allocate(attId, ask, new ArrayList(), null, null, null, null); } protected void createApplicationWithAMResource(ApplicationAttemptId attId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index 53382de..65c80a6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -99,7 +99,7 @@ public void testSchedulingDelay() throws InterruptedException { List ask = new ArrayList<>(); ask.add(createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true)); scheduler.allocate( - appAttemptId, ask, new ArrayList(), null, null); + appAttemptId, ask, new ArrayList(), null, null, null, null); FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId); // Advance time and let continuous scheduling kick in diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index c352cc9..61ea069 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -1372,7 +1372,7 @@ public void testQueueDemandCalculation() throws Exception { ResourceRequest request1 = createResourceRequest(minReqSize * 2, ResourceRequest.ANY, 1, 1, true); ask1.add(request1); - scheduler.allocate(id11, ask1, new ArrayList(), null, null); + scheduler.allocate(id11, ask1, new ArrayList(), null, null, null, null); // Second ask, queue2 requests 1 large + (2 * minReqSize) List ask2 = new ArrayList(); @@ -1382,14 +1382,14 @@ public void testQueueDemandCalculation() throws Exception { false); ask2.add(request2); ask2.add(request3); - scheduler.allocate(id21, ask2, new ArrayList(), null, null); + scheduler.allocate(id21, ask2, new ArrayList(), null, null, null, null); // Third ask, queue2 requests 1 large List ask3 = new ArrayList(); ResourceRequest request4 = createResourceRequest(2 * minReqSize, ResourceRequest.ANY, 1, 1, true); ask3.add(request4); - scheduler.allocate(id22, ask3, new ArrayList(), null, null); + scheduler.allocate(id22, ask3, new ArrayList(), null, null, null, null); scheduler.update(); @@ -2711,7 +2711,7 @@ public void testReservationWhileMultiplePriorities() throws IOException { // Complete container scheduler.allocate(attId, new ArrayList(), - Arrays.asList(containerId), null, null); + Arrays.asList(containerId), null, null, null, null); assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB()); assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores()); @@ -2803,7 +2803,7 @@ public void testMultipleNodesSingleRackRequest() throws Exception { asks.add(createResourceRequest(1024, ResourceRequest.ANY, 1, 2, true)); scheduler.allocate(attemptId, asks, new ArrayList(), null, - null); + null, null, null); // node 1 checks in scheduler.update(); @@ -3199,7 +3199,7 @@ public void testCancelStrictLocality() throws IOException { createResourceRequest(1024, node1.getHostName(), 1, 0, true), createResourceRequest(1024, "rack1", 1, 0, true), createResourceRequest(1024, ResourceRequest.ANY, 1, 1, true)); - scheduler.allocate(attId1, update, new ArrayList(), null, null); + scheduler.allocate(attId1, update, new ArrayList(), null, null, null, null); // then node2 should get the container scheduler.handle(node2UpdateEvent); @@ -3246,7 +3246,7 @@ public void testReservationsStrictLocality() throws IOException { anyRequest = createResourceRequest(1024, ResourceRequest.ANY, 1, 1, false); scheduler.allocate(attId, Arrays.asList(rackRequest, anyRequest), - new ArrayList(), null, null); + new ArrayList(), null, null, null, null); scheduler.handle(nodeUpdateEvent); assertEquals(0, app.getReservedContainers().size()); @@ -4248,7 +4248,7 @@ public void testContinuousScheduling() throws Exception { ResourceRequest request = createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); ask.add(request); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); // waiting for continuous_scheduler_sleep_time // at least one pass @@ -4268,7 +4268,7 @@ public void testContinuousScheduling() throws Exception { ask.clear(); ask.add(request); scheduler.stop(); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); scheduler.continuousSchedulingAttempt(); Assert.assertEquals(2048, app.getCurrentConsumption().getMemory()); Assert.assertEquals(2, app.getCurrentConsumption().getVirtualCores()); @@ -4340,7 +4340,7 @@ public void testSchedulingOnRemovedNode() throws Exception { ask1.add(request1); scheduler.allocate(id11, ask1, new ArrayList(), null, - null); + null, null, null); String hostName = "127.0.0.1"; RMNode node1 = MockNodes.newNodeInfo(1, @@ -4472,7 +4472,7 @@ public void testRecoverRequestAfterPreemption() throws Exception { List containers = scheduler.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null).getContainers(); + Collections. emptyList(), null, null, null, null).getContainers(); // Now with updated ResourceRequest, a container is allocated for AM. Assert.assertTrue(containers.size() == 1); @@ -4501,11 +4501,11 @@ public void testBlacklistNodes() throws Exception { // Verify the blacklist can be updated independent of requesting containers scheduler.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); assertTrue(app.isBlacklisted(host)); scheduler.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); assertFalse(scheduler.getSchedulerApp(appAttemptId).isBlacklisted(host)); List update = Arrays.asList( @@ -4514,7 +4514,7 @@ public void testBlacklistNodes() throws Exception { // Verify a container does not actually get placed on the blacklisted host scheduler.allocate(appAttemptId, update, Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); assertTrue(app.isBlacklisted(host)); scheduler.update(); scheduler.handle(updateEvent); @@ -4524,7 +4524,7 @@ public void testBlacklistNodes() throws Exception { // Verify a container gets placed on the empty blacklist scheduler.allocate(appAttemptId, update, Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); assertFalse(app.isBlacklisted(host)); createSchedulingRequest(GB, "root.default", "user", 1); scheduler.update(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 83d2ccf..6a2e7a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -263,7 +263,7 @@ public void testNodeLocalAssignment() throws Exception { ask.add(nodeLocal); ask.add(rackLocal); ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); @@ -365,7 +365,7 @@ public void testUpdateResourceOnNode() throws Exception { ask.add(nodeLocal); ask.add(rackLocal); ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); // Before the node update event, there are one local request Assert.assertEquals(1, nodeLocal.getNumContainers()); @@ -941,7 +941,7 @@ public void testBlackListNodes() throws Exception { ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); fs.allocate(appAttemptId1, ask1, emptyId, - Collections.singletonList(host_1_0), null); + Collections.singletonList(host_1_0), null, null, null); // Trigger container assignment fs.handle(new NodeUpdateSchedulerEvent(n3)); @@ -949,14 +949,14 @@ public void testBlackListNodes() throws Exception { // Get the allocation for the application and verify no allocation on // blacklist node Allocation allocation1 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation1", 0, allocation1.getContainers().size()); // verify host_1_1 can get allocated as not in blacklist fs.handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation2 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation2", 1, allocation2.getContainers().size()); List containerList = allocation2.getContainers(); for (Container container : containerList) { @@ -971,29 +971,29 @@ public void testBlackListNodes() throws Exception { ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); fs.allocate(appAttemptId1, ask2, emptyId, - Collections.singletonList("rack0"), null); + Collections.singletonList("rack0"), null, null, null); // verify n1 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n1)); Allocation allocation3 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation3", 0, allocation3.getContainers().size()); // verify n2 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n2)); Allocation allocation4 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation4", 0, allocation4.getContainers().size()); // verify n3 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n3)); Allocation allocation5 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation5", 0, allocation5.getContainers().size()); fs.handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation6 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation6", 1, allocation6.getContainers().size()); containerList = allocation6.getContainers(); @@ -1052,25 +1052,25 @@ public void testHeadroom() throws Exception { List ask1 = new ArrayList(); ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); - fs.allocate(appAttemptId1, ask1, emptyId, null, null); + fs.allocate(appAttemptId1, ask1, emptyId, null, null, null, null); // Ask for a 2 GB container for app 2 List ask2 = new ArrayList(); ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(2 * GB, 1), 1)); - fs.allocate(appAttemptId2, ask2, emptyId, null, null); + fs.allocate(appAttemptId2, ask2, emptyId, null, null, null, null); // Trigger container assignment fs.handle(new NodeUpdateSchedulerEvent(n1)); // Get the allocation for the applications and verify headroom Allocation allocation1 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("Allocation headroom", 1 * GB, allocation1 .getResourceLimit().getMemory()); Allocation allocation2 = - fs.allocate(appAttemptId2, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId2, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("Allocation headroom", 1 * GB, allocation2 .getResourceLimit().getMemory());