diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index f4578dc..a2e900e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -90,6 +90,7 @@ private final Context context; private volatile AtomicBoolean shouldLaunchContainer = new AtomicBoolean(false); + private volatile AtomicBoolean started = new AtomicBoolean(false); private volatile AtomicBoolean completed = new AtomicBoolean(false); private long sleepDelayBeforeSigKill = 250; @@ -120,6 +121,7 @@ public ContainerLaunch(Context context, Configuration configuration, @Override @SuppressWarnings("unchecked") // dispatcher not typed public Integer call() { + started.set(true); final ContainerLaunchContext launchContext = container.getLaunchContext(); Map> localResources = null; ContainerId containerID = container.getContainerId(); @@ -130,7 +132,7 @@ public Integer call() { try { localResources = container.getLocalizedResources(); if (localResources == null) { - RPCUtil.getRemoteException( + throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + containerID + " is at " + container.getContainerState()); } @@ -308,6 +310,10 @@ public Integer call() { return 0; } + public boolean isStarted() { + return started.get(); + } + /** * Cleanup the container. * Cancels the launch if launch has not started yet or signals diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index 03ddb56..f4551f4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -72,10 +72,12 @@ new ThreadFactoryBuilder() .setNameFormat("ContainersLauncher #%d") .build()); - private final Map running = + @VisibleForTesting + public final Map running = Collections.synchronizedMap(new HashMap()); - private static final class RunningContainer { + @VisibleForTesting + public static final class RunningContainer { public RunningContainer(Future submit, ContainerLaunch launcher) { this.runningcontainer = submit; @@ -83,7 +85,8 @@ public RunningContainer(Future submit, } Future runningcontainer; - ContainerLaunch launcher; + @VisibleForTesting + public ContainerLaunch launcher; } @@ -139,12 +142,14 @@ public void handle(ContainersLauncherEvent event) { return; } Future rContainer = rContainerDatum.runningcontainer; + ContainerLaunch launcher = rContainerDatum.launcher; if (rContainer != null && !rContainer.isDone()) { // Cancel the future so that it won't be launched if it isn't already. - // If it is going to be canceled, make sure CONTAINER_KILLED_ON_REQUEST - // will not be missed if the container is already at KILLING - if (rContainer.cancel(false)) { + // If it is going to be canceled before it is started, make sure + // CONTAINER_KILLED_ON_REQUEST will not be missed if the container is + // already at KILLING + if (rContainer.cancel(false) && !launcher.isStarted()) { if (container.getContainerState() == ContainerState.KILLING) { dispatcher.getEventHandler().handle( new ContainerExitEvent(containerId, @@ -158,7 +163,7 @@ public void handle(ContainersLauncherEvent event) { // Cleanup a container whether it is running/killed/completed, so that // no sub-processes are alive. try { - rContainerDatum.launcher.cleanupContainer(); + launcher.cleanupContainer(); } catch (IOException e) { LOG.warn("Got exception while cleaning container " + containerId + ". Ignoring."); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index fc1408b..0a9676e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -24,6 +24,7 @@ import static org.mockito.Matchers.any; import static org.mockito.Matchers.argThat; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.reset; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -66,6 +67,7 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; @@ -73,6 +75,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher.RunningContainer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest; @@ -330,7 +333,7 @@ public void testKillOnLocalizationFailed() throws Exception { } @Test - public void testKillOnLocalized() throws Exception { + public void testKillOnLocalizedWhenContainerNotLaunched() throws Exception { WrappedContainer wc = null; try { wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); @@ -348,6 +351,33 @@ public void testKillOnLocalized() throws Exception { } } } + + @Test + public void testKillOnLocalizedWhenContainerLaunched() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(17, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + assertEquals(ContainerState.LOCALIZED, wc.c.getContainerState()); + // mock that container is to be launched before canceling + RunningContainer rc = wc.launcher.running.get(wc.c.getContainerId()); + rc.launcher = spy(rc.launcher); + when(rc.launcher.isStarted()).thenReturn(true); + wc.killContainer(); + assertEquals(ContainerState.KILLING, wc.c.getContainerState()); + rc.launcher.call(); + wc.drainDispatcherEvents(); + assertEquals(ContainerState.EXITED_WITH_FAILURE, + wc.c.getContainerState()); + assertNull(wc.c.getLocalizedResources()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test public void testResourceLocalizedOnLocalizationFailed() throws Exception { @@ -650,7 +680,8 @@ public boolean matches(Object o) { Context context = mock(Context.class); when(context.getApplications()).thenReturn( new ConcurrentHashMap()); - launcher = new ContainersLauncher(context, dispatcher, null, null); + ContainerExecutor executor = mock(ContainerExecutor.class); + launcher = new ContainersLauncher(context, dispatcher, executor, null); // create a mock ExecutorService, which will not really launch // ContainerLaunch at all. launcher.containerLauncher = mock(ExecutorService.class); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 78c78c0..e2fb917 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -596,9 +596,8 @@ public void testDelayedKill() throws Exception { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : - ExitCode.TERMINATED.getExitCode(); - Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); + Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), + containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it // receives a sigterm so we look for that. We cannot perform this check on