diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 40f051babd6..e5ab0274a15 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -597,29 +597,34 @@ private void updateNMResource(Resource resource) { @VisibleForTesting protected List getContainerStatuses() throws IOException { List containerStatuses = new ArrayList(); - for (Container container : this.context.getContainers().values()) { - ContainerId containerId = container.getContainerId(); - ApplicationId applicationId = containerId.getApplicationAttemptId() - .getApplicationId(); - org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = - container.cloneAndGetContainerStatus(); - if (containerStatus.getState() == ContainerState.COMPLETE) { - if (isApplicationStopped(applicationId)) { - LOG.debug("{} is completing, remove {} from NM context.", - applicationId, containerId); - context.getContainers().remove(containerId); - pendingCompletedContainers.put(containerId, containerStatus); - } else { - if (!isContainerRecentlyStopped(containerId)) { + // if pendingCompletedContainers is not empty, that means + // nm doesn't get response from rm, resending old containers. + // Otherwise adding new complete containers. + if (pendingCompletedContainers.isEmpty()) { + for (Container container : this.context.getContainers().values()) { + ContainerId containerId = container.getContainerId(); + ApplicationId applicationId = containerId.getApplicationAttemptId() + .getApplicationId(); + org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = + container.cloneAndGetContainerStatus(); + if (containerStatus.getState() == ContainerState.COMPLETE) { + if (isApplicationStopped(applicationId)) { + LOG.debug("{} is completing, remove {} from NM context.", + applicationId, containerId); + context.getContainers().remove(containerId); pendingCompletedContainers.put(containerId, containerStatus); + } else { + if (!isContainerRecentlyStopped(containerId)) { + pendingCompletedContainers.put(containerId, containerStatus); + } } + // Adding to finished containers cache. Cache will keep it around at + // least for #durationToTrackStoppedContainers duration. In the + // subsequent call to stop container it will get removed from cache. + addCompletedContainer(containerId); + } else { + containerStatuses.add(containerStatus); } - // Adding to finished containers cache. Cache will keep it around at - // least for #durationToTrackStoppedContainers duration. In the - // subsequent call to stop container it will get removed from cache. - addCompletedContainer(containerId); - } else { - containerStatuses.add(containerStatus); } } @@ -722,7 +727,6 @@ public void removeOrTrackCompletedContainersFromContext( LOG.info("Removed completed containers from NM context: " + removedContainers); } - pendingCompletedContainers.clear(); } private void trackAppsForKeepAlive(List appIds) { @@ -1351,6 +1355,11 @@ public void run() { .getContainersToBeRemovedFromNM()); logAggregationReportForAppsTempList.clear(); + // nm has got response from rm, clear pendingCompletedContainers + // to avoid sending them repeatedly. + if (lastHeartbeatID != response.getResponseId()) { + pendingCompletedContainers.clear(); + } lastHeartbeatID = response.getResponseId(); List containersToCleanup = response .getContainersToCleanup();