diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java index b5f8281..16bdf24 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestNMClient.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -319,7 +320,7 @@ private void testContainerManagement(NMClientImpl nmClient, if (++i < size) { // NodeManager may still need some time to make the container started testGetContainerStatus(container, i, ContainerState.RUNNING, "", - -1000); + Arrays.asList(new Integer[] {-1000})); try { nmClient.stopContainer(container.getId(), container.getNodeId()); @@ -330,8 +331,21 @@ private void testContainerManagement(NMClientImpl nmClient, } // getContainerStatus can be called after stopContainer - testGetContainerStatus(container, i, ContainerState.COMPLETE, - "Container killed by the ApplicationMaster.", 143); + try { + // O is possible if CLEANUP_CONTAINER is executed too late + testGetContainerStatus(container, i, ContainerState.COMPLETE, + "Container killed by the ApplicationMaster.", Arrays.asList( + new Integer[] {143, 0})); + } catch (YarnException e) { + // The exception is possible because, after the container is stopped, + // it may be removed from NM's context. + if (!e.getMessage() + .contains("is not handled by this NodeManager")) { + throw (AssertionError) + (new AssertionError("Exception is not expected: " + e).initCause( + e)); + } + } } } } @@ -345,7 +359,7 @@ private void sleep(int sleepTime) { } private void testGetContainerStatus(Container container, int index, - ContainerState state, String diagnostics, int exitStatus) + ContainerState state, String diagnostics, List exitStatuses) throws YarnException, IOException { while (true) { try { @@ -357,7 +371,7 @@ private void testGetContainerStatus(Container container, int index, assertEquals(container.getId(), status.getContainerId()); assertTrue("" + index + ": " + status.getDiagnostics(), status.getDiagnostics().contains(diagnostics)); - assertEquals(exitStatus, status.getExitStatus()); + assertTrue(exitStatuses.contains(status.getExitStatus())); break; } Thread.sleep(100); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java index 4e374fd..500a854 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainersLauncher.java @@ -37,12 +37,17 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** @@ -61,7 +66,8 @@ private final Dispatcher dispatcher; private LocalDirsHandlerService dirsHandler; - private final ExecutorService containerLauncher = + @VisibleForTesting + protected ExecutorService containerLauncher = Executors.newCachedThreadPool( new ThreadFactoryBuilder() .setNameFormat("ContainersLauncher #%d") @@ -107,6 +113,7 @@ protected void serviceStop() throws Exception { super.serviceStop(); } + @SuppressWarnings("unchecked") @Override public void handle(ContainersLauncherEvent event) { // TODO: ContainersLauncher launches containers one by one!! @@ -134,9 +141,18 @@ public void handle(ContainersLauncherEvent event) { Future rContainer = rContainerDatum.runningcontainer; if (rContainer != null && !rContainer.isDone()) { - // Cancel the future so that it won't be launched - // if it isn't already. - rContainer.cancel(false); + // Cancel the future so that it won't be launched if it isn't already. + // If it is going to be canceled, make sure CONTAINER_KILLED_ON_REQUEST + // will not be missed if the container is already at KILLING + if (rContainer.cancel(false)) { + if (container.getContainerState() == ContainerState.KILLING) { + dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerId, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + ExitCode.TERMINATED.getExitCode(), + "Container terminated before launch.")); + } + } } // Cleanup a container whether it is running/killed/completed, so that diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index e2fb917..267479c 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; import static org.junit.Assert.assertEquals; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -34,6 +35,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; import junit.framework.Assert; @@ -68,8 +73,10 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; +import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; @@ -596,8 +603,9 @@ public void testDelayedKill() throws Exception { ContainerStatus containerStatus = containerManager.getContainerStatuses(gcsRequest) .getContainerStatuses().get(0); - Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), - containerStatus.getExitStatus()); + int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(); + Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Now verify the contents of the file. Script generates a message when it // receives a sigterm so we look for that. We cannot perform this check on @@ -653,6 +661,52 @@ public void handle(Event event) { launch.call(); } + @SuppressWarnings({ "unchecked", "rawtypes" }) + @Test + public void testKillBeforeLaunch() throws Exception { + Context context = mock(Context.class); + when(context.getApplications()).thenReturn( + new ConcurrentHashMap()); + // create a mock Dispatcher, which will assert the expected + // CONTAINER_KILLED_ON_REQUEST event will be emitted + Dispatcher dispatcher = mock(Dispatcher.class); + EventHandler eventHandler = new EventHandler() { + public void handle(Event event) { + Assert.assertTrue(event instanceof ContainerExitEvent); + ContainerExitEvent exitEvent = (ContainerExitEvent) event; + Assert.assertEquals(ContainerEventType.CONTAINER_KILLED_ON_REQUEST, + exitEvent.getType()); + } + }; + when(dispatcher.getEventHandler()).thenReturn(eventHandler); + ContainersLauncher launcher = + new ContainersLauncher(context, dispatcher, null, null); + // create a mock ExecutorService, which will not really launch + // ContainerLaunch at all. + launcher.containerLauncher = mock(ExecutorService.class); + Future future = mock(Future.class); + when(launcher.containerLauncher.submit + (any(Callable.class))).thenReturn(future); + when(future.isDone()).thenReturn(false); + when(future.cancel(false)).thenReturn(true); + launcher.init(new Configuration()); + launcher.start(); + Container container = mock(Container.class); + // the container is at KILLING, as stopContainer is executed + when(container.getContainerState()).thenReturn( + org.apache.hadoop.yarn.server.nodemanager.containermanager.container + .ContainerState.KILLING); + when(container.getContainerId()).thenReturn(ContainerId.newInstance( + ApplicationAttemptId.newInstance(ApplicationId.newInstance( + System.currentTimeMillis(), 1), 1), 1)); + launcher.handle(new ContainersLauncherEvent( + container, ContainersLauncherEventType.LAUNCH_CONTAINER)); + launcher.handle(new ContainersLauncherEvent( + container, ContainersLauncherEventType.CLEANUP_CONTAINER)); + launcher.stop(); + launcher.close(); + } + protected Token createContainerToken(ContainerId cId) throws InvalidToken { Resource r = BuilderUtils.newResource(1024, 1); ContainerTokenIdentifier containerTokenIdentifier =