commit 3354a3174c368d2c82e17176a0f094f485322b57 Author: Eric Yang Date: Mon Apr 15 18:24:40 2019 -0400 YARN-9486. Clean up failed Docker containers. Contributed by Eric Yang diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 8516543..e878626 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.WindowsSecureContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; @@ -85,6 +86,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.deletion.task.DockerContainerDeletionTask; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerPrepareContext; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java index 226b53d..f165605 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerRelaunch.java @@ -87,7 +87,14 @@ public Integer call() { Path nmPrivateTruststorePath = (container.getCredentials().getSecretKey( AMSecretKeys.YARN_APPLICATION_AM_TRUSTSTORE) == null) ? null : getNmPrivateTruststorePath(appIdStr, containerIdStr); - pidFilePath = getPidFilePath(appIdStr, containerIdStr); + try { + // try to locate existing pid file. + pidFilePath = getPidFilePath(appIdStr, containerIdStr); + } catch (IOException e) { + // reset pid file path if it did not exist. + String pidFileSubpath = getPidFileSubpath(appIdStr, containerIdStr); + pidFilePath = dirsHandler.getLocalPathForWrite(pidFileSubpath); + } LOG.info("Relaunch container with " + "workDir = " + containerWorkDir.toString() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerCleanup.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerCleanup.java index 6c99379..17574df 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerCleanup.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerCleanup.java @@ -79,6 +79,7 @@ public void setup() throws Exception { launch = mock(ContainerLaunch.class); launch.containerAlreadyLaunched = new AtomicBoolean(false); + launch.completed = new AtomicBoolean(false); launch.pidFilePath = new Path("target/" + containerId.toString() + ".pid"); when(launch.getContainerPid()).thenReturn(containerId.toString()); @@ -105,4 +106,16 @@ public void testCleanup() throws Exception { Assert.assertEquals("signal", ContainerExecutor.Signal.TERM, captor.getValue().getSignal()); } + + @Test + public void testFailedExitCleanup() throws Exception { + launch.completed.set(true); + cleanup.run(); + ArgumentCaptor captor = + ArgumentCaptor.forClass(ContainerSignalContext.class); + + verify(executor, Mockito.times(1)).signalContainer(captor.capture()); + Assert.assertEquals("signal", ContainerExecutor.Signal.TERM, + captor.getValue().getSignal()); + } }