diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 1bff008..edc3146 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -68,6 +68,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.util.ProcessIdFileReader; @@ -133,10 +134,22 @@ public Integer call() { final List command = launchContext.getCommands(); int ret = -1; + // CONTAINER_KILLED_ON_REQUEST should not be missed if the container + // is already at KILLING + if (container.getContainerState() == ContainerState.KILLING) { + dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerID, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(), + "Container terminated before launch.")); + return 0; + } + try { localResources = container.getLocalizedResources(); if (localResources == null) { - RPCUtil.getRemoteException( + throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + containerID + " is at " + container.getContainerState()); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index 33e3c1c..ce865e3 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -75,20 +75,9 @@ new ThreadFactoryBuilder() .setNameFormat("ContainersLauncher #%d") .build()); - private final Map running = - Collections.synchronizedMap(new HashMap()); - - private static final class RunningContainer { - public RunningContainer(Future submit, - ContainerLaunch launcher) { - this.runningcontainer = submit; - this.launcher = launcher; - } - - Future runningcontainer; - ContainerLaunch launcher; - } - + @VisibleForTesting + public final Map running = + Collections.synchronizedMap(new HashMap()); public ContainersLauncher(Context context, Dispatcher dispatcher, ContainerExecutor exec, LocalDirsHandlerService dirsHandler, @@ -133,38 +122,20 @@ public void handle(ContainersLauncherEvent event) { ContainerLaunch launch = new ContainerLaunch(context, getConfig(), dispatcher, exec, app, event.getContainer(), dirsHandler, containerManager); - running.put(containerId, - new RunningContainer(containerLauncher.submit(launch), - launch)); + containerLauncher.submit(launch); + running.put(containerId, launch); break; case CLEANUP_CONTAINER: - RunningContainer rContainerDatum = running.remove(containerId); - if (rContainerDatum == null) { + ContainerLaunch launcher = running.remove(containerId); + if (launcher == null) { // Container not launched. So nothing needs to be done. return; } - Future rContainer = rContainerDatum.runningcontainer; - if (rContainer != null - && !rContainer.isDone()) { - // Cancel the future so that it won't be launched if it isn't already. - // If it is going to be canceled, make sure CONTAINER_KILLED_ON_REQUEST - // will not be missed if the container is already at KILLING - if (rContainer.cancel(false)) { - if (container.getContainerState() == ContainerState.KILLING) { - dispatcher.getEventHandler().handle( - new ContainerExitEvent(containerId, - ContainerEventType.CONTAINER_KILLED_ON_REQUEST, - Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(), - "Container terminated before launch.")); - } - } - } // Cleanup a container whether it is running/killed/completed, so that // no sub-processes are alive. try { - rContainerDatum.launcher.cleanupContainer(); + launcher.cleanupContainer(); } catch (IOException e) { LOG.warn("Got exception while cleaning container " + containerId + ". Ignoring."); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 14d445f..ebc400a 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; @@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType; @@ -296,8 +298,7 @@ public void testCleanupOnKillRequest() throws Exception { wc.launchContainer(); reset(wc.localizerBus); wc.killContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.containerKilledOnRequest(); @@ -330,14 +331,18 @@ public void testKillOnLocalizationFailed() throws Exception { } @Test - public void testKillOnLocalized() throws Exception { + public void testKillOnLocalizedWhenContainerNotLaunched() throws Exception { WrappedContainer wc = null; try { wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); wc.initContainer(); wc.localizeResources(); assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + launcher.call(); + wc.drainDispatcherEvents(); assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); @@ -348,6 +353,31 @@ public void testKillOnLocalized() throws Exception { } } } + + @Test + public void testKillOnLocalizedWhenContainerLaunched() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + ContainerLaunch launcher = wc.launcher.running.get(wc.c.getContainerId()); + launcher.call(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + wc.killContainer(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testResourceLocalizedOnLocalizationFailed() throws Exception { @@ -472,12 +502,10 @@ public void testLaunchAfterKillRequest() throws Exception { wc.initContainer(); wc.localizeResources(); wc.killContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.launchContainer(); - assertEquals(ContainerState.CONTAINER_CLEANEDUP_AFTER_KILL, - wc.c.getContainerState()); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); wc.containerKilledOnRequest(); verifyCleanupCall(wc); @@ -650,7 +678,9 @@ public boolean matches(Object o) { Context context = mock(Context.class); when(context.getApplications()).thenReturn( new ConcurrentHashMap()); - launcher = new ContainersLauncher(context, dispatcher, null, null, null); + ContainerExecutor executor = mock(ContainerExecutor.class); + launcher = + new ContainersLauncher(context, dispatcher, executor, null, null); // create a mock ExecutorService, which will not really launch // ContainerLaunch at all. launcher.containerLauncher = mock(ExecutorService.class); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 0a0a459..6612ea6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -682,9 +682,8 @@ public void testDelayedKill() throws Exception { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(); - Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); + Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), + containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it // receives a sigterm so we look for that. We cannot perform this check on