diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java index 19be39f..ccba35f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedAppsEvent.java @@ -27,13 +27,23 @@ public class CMgrCompletedAppsEvent extends ContainerManagerEvent { private final List appsToCleanup; + private Reason reason; - public CMgrCompletedAppsEvent(List appsToCleanup) { + public CMgrCompletedAppsEvent(List appsToCleanup, Reason reason) { super(ContainerManagerEventType.FINISH_APPS); this.appsToCleanup = appsToCleanup; + this.reason = reason; } public List getAppsToCleanup() { return this.appsToCleanup; } + + public Reason getReason() { + return reason; + } + + public static enum Reason { + ON_SHUTDOWN, BY_RESOURCEMANAGER + } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java index 675b605..807918d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java @@ -25,23 +25,13 @@ public class CMgrCompletedContainersEvent extends ContainerManagerEvent { private List containerToCleanup; - private Reason reason; - public CMgrCompletedContainersEvent(List containersToCleanup, Reason reason) { + public CMgrCompletedContainersEvent(List containersToCleanup) { super(ContainerManagerEventType.FINISH_CONTAINERS); this.containerToCleanup = containersToCleanup; - this.reason = reason; } public List getContainersToCleanup() { return this.containerToCleanup; } - - public Reason getReason() { - return reason; - } - - public static enum Reason { - ON_SHUTDOWN, BY_RESOURCEMANAGER - } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index e287add..79b9d7a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -19,9 +19,6 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; @@ -67,11 +64,6 @@ * Priority of the NodeManager shutdown hook. */ public static final int SHUTDOWN_HOOK_PRIORITY = 30; - - /** - * Extra duration to wait for containers to be killed on shutdown. - */ - private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000; private static final Log LOG = LogFactory.getLog(NodeManager.class); protected final NodeManagerMetrics metrics = NodeManagerMetrics.create(); @@ -84,8 +76,6 @@ private NodeStatusUpdater nodeStatusUpdater; private static CompositeServiceShutdownHook nodeManagerShutdownHook; - private long waitForContainersOnShutdownMillis; - private AtomicBoolean isStopping = new AtomicBoolean(false); public NodeManager() { @@ -193,13 +183,6 @@ protected void serviceInit(Configuration conf) throws Exception { // so that we make sure everything is up before registering with RM. addService(nodeStatusUpdater); - waitForContainersOnShutdownMillis = - conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, - YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + - conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS, - YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + - SHUTDOWN_CLEANUP_SLOP_MS; - super.serviceInit(conf); // TODO add local dirs to del } @@ -219,9 +202,6 @@ protected void serviceStop() throws Exception { if (isStopping.getAndSet(true)) { return; } - if (context != null) { - cleanupContainers(NodeManagerEventType.SHUTDOWN); - } super.serviceStop(); DefaultMetricsSystem.shutdown(); } @@ -246,68 +226,12 @@ protected void resyncWithRM() { public void run() { LOG.info("Notifying ContainerManager to block new container-requests"); containerManager.setBlockNewContainerRequests(true); - cleanupContainers(NodeManagerEventType.RESYNC); + containerManager.cleanUpApplications(NodeManagerEventType.RESYNC); ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); } }.start(); } - @SuppressWarnings("unchecked") - protected void cleanupContainers(NodeManagerEventType eventType) { - Map containers = context.getContainers(); - if (containers.isEmpty()) { - return; - } - LOG.info("Containers still running on " + eventType + " : " - + containers.keySet()); - - List containerIds = - new ArrayList(containers.keySet()); - dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containerIds, - CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN)); - - LOG.info("Waiting for containers to be killed"); - - switch (eventType) { - case SHUTDOWN: - long waitStartTime = System.currentTimeMillis(); - while (!containers.isEmpty() - && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { - try { - //To remove done containers in NM context - nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); - Thread.sleep(1000); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on container kill on shutdown", - ex); - } - } - break; - case RESYNC: - while (!containers.isEmpty()) { - try { - Thread.sleep(1000); - nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on container kill on resync", - ex); - } - } - break; - default: - LOG.warn("Invalid eventType: " + eventType); - } - - // All containers killed - if (containers.isEmpty()) { - LOG.info("All containers in DONE state"); - } else { - LOG.info("Done waiting for containers to be killed. Still alive: " + - containers.keySet()); - } - } - public static class NMContext implements Context { private NodeId nodeId = null; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 05d9fea..f2c6e00 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -471,8 +471,7 @@ public void run() { .getContainersToCleanup(); if (containersToCleanup.size() != 0) { dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containersToCleanup, - CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER)); + new CMgrCompletedContainersEvent(containersToCleanup)); } List appsToCleanup = response.getApplicationsToCleanup(); @@ -480,7 +479,8 @@ public void run() { trackAppsForKeepAlive(appsToCleanup); if (appsToCleanup.size() != 0) { dispatcher.getEventHandler().handle( - new CMgrCompletedAppsEvent(appsToCleanup)); + new CMgrCompletedAppsEvent(appsToCleanup, + CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER)); } } catch (ConnectException e) { //catch and throw the exception if tried MAX wait time to connect RM diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 0af4332..bdc8f98 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -82,6 +82,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; +import org.apache.hadoop.yarn.server.nodemanager.NodeManagerEventType; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; @@ -119,6 +120,11 @@ ServiceStateChangeListener, ContainerManagementProtocol, EventHandler { + /** + * Extra duration to wait for applications to be killed on shutdown. + */ + private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000; + private static final Log LOG = LogFactory.getLog(ContainerManagerImpl.class); final Context context; @@ -138,6 +144,8 @@ private final DeletionService deletionService; private AtomicBoolean blockNewContainerRequests = new AtomicBoolean(false); + private long waitForContainersOnShutdownMillis; + public ContainerManagerImpl(Context context, ContainerExecutor exec, DeletionService deletionContext, NodeStatusUpdater nodeStatusUpdater, NodeManagerMetrics metrics, ApplicationACLsManager aclsManager, @@ -189,6 +197,13 @@ public void serviceInit(Configuration conf) throws Exception { addIfService(logHandler); dispatcher.register(LogHandlerEventType.class, logHandler); + waitForContainersOnShutdownMillis = + conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, + YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + + conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS, + YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + + SHUTDOWN_CLEANUP_SLOP_MS; + super.serviceInit(conf); } @@ -274,6 +289,9 @@ void refreshServiceAcls(Configuration configuration, @Override public void serviceStop() throws Exception { + if (context != null) { + cleanUpApplications(NodeManagerEventType.SHUTDOWN); + } if (auxiliaryServices.getServiceState() == STARTED) { auxiliaryServices.unregisterServiceListener(this); } @@ -283,6 +301,59 @@ public void serviceStop() throws Exception { super.serviceStop(); } + public void cleanUpApplications(NodeManagerEventType eventType) { + Map applications = + this.context.getApplications(); + if (applications.isEmpty()) { + return; + } + LOG.info("Applications still running : " + applications.keySet()); + + List appIds = + new ArrayList(applications.keySet()); + this.handle( + new CMgrCompletedAppsEvent(appIds, + CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); + + LOG.info("Waiting for Applications to be Finished"); + + switch (eventType) { + case SHUTDOWN: + long waitStartTime = System.currentTimeMillis(); + while (!applications.isEmpty() + && System.currentTimeMillis() - waitStartTime + < waitForContainersOnShutdownMillis) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on applications finish on shutdown", + ex); + } + } + break; + case RESYNC: + while (!applications.isEmpty()) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on applications finish on resync", + ex); + } + } + break; + default: + LOG.warn("Invalid eventType: " + eventType); + } + + // All applications Finished + if (applications.isEmpty()) { + LOG.info("All applications in FINISHED state"); + } else { + LOG.info("Done waiting for Applications to be Finished. Still alive: " + + applications.keySet()); + } + } + // Get the remoteUGI corresponding to the api call. protected UserGroupInformation getRemoteUgi() throws YarnException { @@ -713,9 +784,15 @@ public void handle(ContainerManagerEvent event) { CMgrCompletedAppsEvent appsFinishedEvent = (CMgrCompletedAppsEvent) event; for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) { + String diagnostic = ""; + if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) { + diagnostic = "Application killed on SHUTDOWN"; + } else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) { + diagnostic = "Application killed by ResourceManager"; + } this.dispatcher.getEventHandler().handle( new ApplicationFinishEvent(appID, - "Application Killed by ResourceManager")); + diagnostic)); } break; case FINISH_CONTAINERS: @@ -723,16 +800,9 @@ public void handle(ContainerManagerEvent event) { (CMgrCompletedContainersEvent) event; for (ContainerId container : containersFinishedEvent .getContainersToCleanup()) { - String diagnostic = ""; - if (containersFinishedEvent.getReason() == - CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) { - diagnostic = "Container Killed on Shutdown"; - } else if (containersFinishedEvent.getReason() == - CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) { - diagnostic = "Container Killed by ResourceManager"; - } - this.dispatcher.getEventHandler().handle( - new ContainerKillEvent(container, diagnostic)); + this.dispatcher.getEventHandler().handle( + new ContainerKillEvent(container, + "Container Killed by ResourceManager")); } break; default: diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java index edf6359..a901c20 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java @@ -177,6 +177,9 @@ public ApplicationState getApplicationState() { ApplicationState.APPLICATION_RESOURCES_CLEANINGUP), ApplicationEventType.APPLICATION_CONTAINER_FINISHED, new AppFinishTransition()) + .addTransition(ApplicationState.FINISHING_CONTAINERS_WAIT, + ApplicationState.FINISHING_CONTAINERS_WAIT, + ApplicationEventType.FINISH_APPLICATION) // Transitions from APPLICATION_RESOURCES_CLEANINGUP state .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, @@ -186,12 +189,17 @@ public ApplicationState getApplicationState() { ApplicationState.FINISHED, ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP, new AppCompletelyDoneTransition()) + .addTransition(ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, + ApplicationState.APPLICATION_RESOURCES_CLEANINGUP, + ApplicationEventType.FINISH_APPLICATION) // Transitions from FINISHED state .addTransition(ApplicationState.FINISHED, ApplicationState.FINISHED, ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED, new AppLogsAggregatedTransition()) + .addTransition(ApplicationState.FINISHED, ApplicationState.FINISHED, + ApplicationEventType.FINISH_APPLICATION) // create the topology tables .installTopology(); @@ -343,7 +351,7 @@ void handleAppFinishWithContainersCleanedup() { @Override public ApplicationState transition(ApplicationImpl app, ApplicationEvent event) { - + ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event; if (app.containers.isEmpty()) { // No container to cleanup. Cleanup app level resources. app.handleAppFinishWithContainersCleanedup(); @@ -355,7 +363,7 @@ public ApplicationState transition(ApplicationImpl app, for (ContainerId containerID : app.containers.keySet()) { app.dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, - "Container killed on application-finish event from RM.")); + "Container killed on application-finish event: " + appEvent.getDiagnostic())); } return ApplicationState.FINISHING_CONTAINERS_WAIT; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java index fab9e01..e69170e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java @@ -288,6 +288,7 @@ private YarnConfiguration createNMConfig() { conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "127.0.0.1:12346"); conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index a05e341..3e0846b 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -143,6 +143,7 @@ private YarnConfiguration createNMConfig() { conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index 6fcb1e0..46f7a87 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -240,6 +240,7 @@ private YarnConfiguration createNMConfig() { conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 3fc5a2d..86d2a11 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -452,13 +452,13 @@ protected NodeStatusUpdater createNodeStatusUpdater(Context context, @Override protected void serviceStop() throws Exception { + System.out.println("Called stooppppp"); super.serviceStop(); isStopped = true; - ConcurrentMap containers = - getNMContext().getContainers(); - // ensure that containers are empty - if(!containers.isEmpty()) { + ConcurrentMap applications = + getNMContext().getApplications(); + // ensure that applications are empty + if(!applications.isEmpty()) { assertionFailedInThread.set(true); } syncBarrier.await(10000, TimeUnit.MILLISECONDS); @@ -855,9 +855,20 @@ protected NodeStatusUpdater createNodeStatusUpdater(Context context, } @Override - protected void cleanupContainers(NodeManagerEventType eventType) { - super.cleanupContainers(NodeManagerEventType.SHUTDOWN); - numCleanups.incrementAndGet(); + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService dirsHandler) { + return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, + metrics, aclsManager, dirsHandler) { + + @Override + public void cleanUpApplications(NodeManagerEventType eventType) { + super.cleanUpApplications(NodeManagerEventType.SHUTDOWN); + numCleanups.incrementAndGet(); + } + }; } }; @@ -1157,6 +1168,7 @@ public void testNodeStatusUpdaterRetryAndNMShutdown() .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs); conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); CyclicBarrier syncBarrier = new CyclicBarrier(2); nm = new MyNodeManager2(syncBarrier, conf); nm.init(conf); @@ -1297,6 +1309,7 @@ private YarnConfiguration createNMConfig() { conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index b02054c..4f23427 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -166,6 +166,7 @@ public void setup() throws IOException { conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); // Default delSrvc delSrvc = createDeletionService(); delSrvc.init(conf); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index e5b318e..ece695f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -538,7 +538,7 @@ public void testLocalFilesCleanup() throws InterruptedException, // Simulate RM sending an AppFinish event. containerManager.handle(new CMgrCompletedAppsEvent(Arrays - .asList(new ApplicationId[] { appId }))); + .asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java index 429ad45..5914a3e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/TestApplication.java @@ -559,8 +559,8 @@ public void applicationInited() { } public void appFinished() { - app.handle(new ApplicationEvent(appId, - ApplicationEventType.FINISH_APPLICATION)); + app.handle(new ApplicationFinishEvent(appId, + "Finish Application")); drainDispatcherEvents(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 5179f3f..bfb0e87 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -826,7 +826,7 @@ public void testLogAggregationForRealContainerLaunch() throws IOException, cId, ContainerState.COMPLETE); this.containerManager.handle(new CMgrCompletedAppsEvent(Arrays - .asList(appId))); + .asList(appId), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); this.containerManager.stop(); }