diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 16ab55d..919f12d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -435,7 +435,10 @@ public void setAMContainer(boolean isAMContainer) { @Override public void handle(RMContainerEvent event) { - LOG.debug("Processing " + event.getContainerId() + " of type " + event.getType()); + if (LOG.isDebugEnabled()) { + LOG.debug("Processing " + event.getContainerId() + " of type " + event + .getType()); + } try { writeLock.lock(); RMContainerState oldState = getState(); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index 6c4f300..e219494 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -154,11 +154,13 @@ public synchronized void allocateContainer(RMContainer rmContainer) { launchedContainers.put(container.getId(), rmContainer); - LOG.info("Assigned container " + container.getId() + " of capacity " - + container.getResource() + " on host " + rmNode.getNodeAddress() - + ", which has " + numContainers + " containers, " - + getAllocatedResource() + " used and " + getUnallocatedResource() - + " available after allocation"); + if (LOG.isDebugEnabled()) { + LOG.debug("Assigned container " + container.getId() + " of capacity " + + container.getResource() + " on host " + rmNode.getNodeAddress() + + ", which has " + numContainers + " containers, " + + getAllocatedResource() + " used and " + getUnallocatedResource() + + " available after allocation"); + } } /** @@ -175,11 +177,13 @@ protected synchronized void changeContainerResource(ContainerId containerId, addUnallocatedResource(deltaResource); } - LOG.info((increase ? "Increased" : "Decreased") + " container " - + containerId + " of capacity " + deltaResource + " on host " - + rmNode.getNodeAddress() + ", which has " + numContainers - + " containers, " + getAllocatedResource() + " used and " - + getUnallocatedResource() + " available after allocation"); + if (LOG.isDebugEnabled()) { + LOG.debug((increase ? "Increased" : "Decreased") + " container " + + containerId + " of capacity " + deltaResource + " on host " + + rmNode.getNodeAddress() + ", which has " + numContainers + + " containers, " + getAllocatedResource() + " used and " + + getUnallocatedResource() + " available after allocation"); + } } /** @@ -261,11 +265,13 @@ public synchronized void releaseContainer(Container container) { updateResource(container); } - LOG.info("Released container " + container.getId() + " of capacity " - + container.getResource() + " on host " + rmNode.getNodeAddress() - + ", which currently has " + numContainers + " containers, " - + getAllocatedResource() + " used and " + getUnallocatedResource() - + " available" + ", release resources=" + true); + if (LOG.isDebugEnabled()) { + LOG.debug("Released container " + container.getId() + " of capacity " + + container.getResource() + " on host " + rmNode.getNodeAddress() + + ", which currently has " + numContainers + " containers, " + + getAllocatedResource() + " used and " + getUnallocatedResource() + + " available" + ", release resources=" + true); + } } /** diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index e426da6..ebd7750 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -133,14 +133,16 @@ synchronized public void containerCompleted(RMContainer rmContainer, // Inform the container rmContainer.handle( - new RMContainerFinishedEvent( - containerId, - containerStatus, - event) - ); - LOG.info("Completed container: " + rmContainer.getContainerId() + - " in state: " + rmContainer.getState() + " event:" + event); - + new RMContainerFinishedEvent( + containerId, + containerStatus, + event) + ); + if (LOG.isDebugEnabled()) { + LOG.debug("Completed container: " + rmContainer.getContainerId() + + " in state: " + rmContainer.getState() + " event:" + event); + } + // Remove from the list of containers liveContainers.remove(rmContainer.getContainerId()); @@ -152,6 +154,7 @@ synchronized public void containerCompleted(RMContainer rmContainer, Resource containerResource = rmContainer.getContainer().getResource(); queue.getMetrics().releaseResources(getUser(), 1, containerResource); this.attemptResourceUsage.decUsed(containerResource); + ((FSQueue)queue).decrementResourceUsage(container.getResource()); // remove from preemption map if it is completed preemptionMap.remove(rmContainer); @@ -385,6 +388,7 @@ else if (allowed.equals(NodeType.RACK_LOCAL) && List resourceRequestList = appSchedulingInfo.allocate( type, node, priority, request, container); this.attemptResourceUsage.incUsed(container.getResource()); + ((FSQueue)queue).incrementResourceUsage(container.getResource()); // Update resource requests related to "request" and store in RMContainer ((RMContainerImpl) rmContainer).setResourceRequests(resourceRequestList); @@ -406,6 +410,17 @@ else if (allowed.equals(NodeType.RACK_LOCAL) && return rmContainer; } + @Override + public synchronized void recoverContainer(SchedulerNode node, + RMContainer rmContainer) { + super.recoverContainer(node, rmContainer); + if (!rmContainer.getState().equals(org.apache.hadoop.yarn.server + .resourcemanager.rmcontainer.RMContainerState.COMPLETED)) { + ((FSQueue)queue).incrementResourceUsage(rmContainer.getContainer() + .getResource()); + } + } + /** * Should be called when the scheduler assigns a container at a higher * degree of locality than the current threshold. Reset the allowed locality @@ -424,6 +439,8 @@ public void addPreemption(RMContainer container, long time) { assert preemptionMap.get(container) == null; preemptionMap.put(container, time); Resources.addTo(preemptedResources, container.getAllocatedResource()); + // Preempted resources should bring down resource usage. + ((FSQueue)queue).decrementResourceUsage(container.getAllocatedResource()); } public Long getContainerPreemptionTime(RMContainer container) { @@ -444,10 +461,12 @@ public Resource getPreemptedResources() { } public void resetPreemptedResources() { + ((FSQueue)queue).incrementResourceUsage(preemptedResources); preemptedResources = Resources.createResource(0); for (RMContainer container : getPreemptionContainers()) { Resources.addTo(preemptedResources, container.getAllocatedResource()); } + ((FSQueue)queue).decrementResourceUsage(preemptedResources); } public void clearPreemptedResources() { diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index ca5a146..5b7827a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -239,23 +239,6 @@ public Resource getDemand() { return demand; } - @Override - public Resource getResourceUsage() { - Resource usage = Resources.createResource(0); - readLock.lock(); - try { - for (FSAppAttempt app : runnableApps) { - Resources.addTo(usage, app.getResourceUsage()); - } - for (FSAppAttempt app : nonRunnableApps) { - Resources.addTo(usage, app.getResourceUsage()); - } - } finally { - readLock.unlock(); - } - return usage; - } - public Resource getAmResourceUsage() { return amResourceUsage; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index d9fac90..6459ae0 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -135,20 +135,6 @@ public Resource getDemand() { } @Override - public Resource getResourceUsage() { - Resource usage = Resources.createResource(0); - readLock.lock(); - try { - for (FSQueue child : childQueues) { - Resources.addTo(usage, child.getResourceUsage()); - } - } finally { - readLock.unlock(); - } - return usage; - } - - @Override public void updateDemand() { // Compute demand by iterating through apps in the queue // Limit demand to maxResources diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java index a33084f..b2b59b2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java @@ -62,6 +62,7 @@ private long fairSharePreemptionTimeout = Long.MAX_VALUE; private long minSharePreemptionTimeout = Long.MAX_VALUE; private float fairSharePreemptionThreshold = 0.5f; + private final Resource usage = Resource.newInstance(0, 0); private boolean preemptable = true; public FSQueue(String name, FairScheduler scheduler, FSParentQueue parent) { @@ -72,7 +73,26 @@ public FSQueue(String name, FairScheduler scheduler, FSParentQueue parent) { metrics.setMaxShare(getMaxShare()); this.parent = parent; } - + + @Override + public Resource getResourceUsage() { + return usage; + } + + public synchronized void incrementResourceUsage(Resource res) { + Resources.addTo(this.usage, res); + if (parent != null) { + parent.incrementResourceUsage(res); + } + } + + public synchronized void decrementResourceUsage(Resource res) { + Resources.subtractFrom(this.usage, res); + if (parent != null) { + parent.decrementResourceUsage(res); + } + } + public String getName() { return name; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 3df0600..b3a95bd 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -494,9 +494,11 @@ protected void warnOrKillContainer(RMContainer container) { // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). super.completedContainer(container, status, RMContainerEventType.KILL); - LOG.info("Killing container" + container + - " (after waiting for preemption for " + - (getClock().getTime() - time) + "ms)"); + if (LOG.isDebugEnabled()) { + LOG.debug("Killing container" + container + + " (after waiting for preemption for " + + (getClock().getTime() - time) + "ms)"); + } } } else { // track the request in the FSAppAttempt itself @@ -722,8 +724,8 @@ protected synchronized void addApplicationAttempt( } } else { rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.ATTEMPT_ADDED)); + new RMAppAttemptEvent(applicationAttemptId, + RMAppAttemptEventType.ATTEMPT_ADDED)); } } @@ -869,9 +871,11 @@ protected synchronized void completedContainerInternal( updateRootQueueMetrics(); } - LOG.info("Application attempt " + application.getApplicationAttemptId() - + " released container " + container.getId() + " on node: " + node - + " with event: " + event); + if (LOG.isDebugEnabled()) { + LOG.debug("Application attempt " + application.getApplicationAttemptId() + + " released container " + container.getId() + " on node: " + node + + " with event: " + event); + } } private synchronized void addNode(List containerReports,