From 41bc5b3a6a05c7bdad421f4fcf3b58ef612288ed Mon Sep 17 00:00:00 2001 From: SuperbDong <> Date: Sun, 16 Dec 2018 16:40:11 +0800 Subject: [PATCH] recovery contaienr exit code not right --- .../RecoverPausedContainerLaunch.java | 12 ++ .../launcher/RecoveredContainerLaunch.java | 11 ++ .../TestRecoveredContainerLaunch.java | 108 ++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestRecoveredContainerLaunch.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java index 761fe3b11e12..9060100b551e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoverPausedContainerLaunch.java @@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; @@ -103,6 +104,17 @@ public Integer call() { } } + if (retCode == ExitCode.FORCE_KILLED.getExitCode() + || retCode == ExitCode.TERMINATED.getExitCode()) { + // If the process was killed, Send container_cleanedup_after_kill and + // just break out of this method. + this.dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerId, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, retCode, + "Container exited with a non-zero exit code " + retCode)); + return retCode; + } + if (retCode != 0) { LOG.warn("Recovered container exited with a non-zero exit code " + retCode); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java index a3ccf00de2b6..6b8da52e4916 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java @@ -108,6 +108,17 @@ public Integer call() { } } + if (retCode == ExitCode.FORCE_KILLED.getExitCode() + || retCode == ExitCode.TERMINATED.getExitCode()) { + // If the process was killed, Send container_cleanedup_after_kill and + // just break out of this method. + this.dispatcher.getEventHandler().handle( + new ContainerExitEvent(containerId, + ContainerEventType.CONTAINER_KILLED_ON_REQUEST, retCode, + "Container exited with a non-zero exit code " + retCode)); + return retCode; + } + if (retCode != 0) { LOG.warn("Recovered container exited with a non-zero exit code " + retCode); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestRecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestRecoveredContainerLaunch.java new file mode 100644 index 000000000000..43a855b32ec8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestRecoveredContainerLaunch.java @@ -0,0 +1,108 @@ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Collections; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.Event; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerExitEvent; +import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService; +import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; +import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; +import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests to verify all the Recovered Container's Launcher Events in + * {@link RecoveredContainerLaunch} are handled as expected. + */ +public class TestRecoveredContainerLaunch extends BaseContainerManagerTest { + + public TestRecoveredContainerLaunch() throws UnsupportedFileSystemException { + super(); + } + + @Before + public void setup() throws IOException { + conf.setClass( + YarnConfiguration.NM_MON_RESOURCE_CALCULATOR, + LinuxResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); + super.setup(); + context = new NMContext(new NMContainerTokenSecretManager( + conf), new NMTokenSecretManagerInNM(), dirsHandler, + new ApplicationACLsManager(conf), new NMNullStateStoreService(), false, + conf) { + public int getHttpPort() { + return HTTP_PORT; + } + @Override + public ContainerExecutor getContainerExecutor() { + return exec; + } + }; + } + + @SuppressWarnings("rawtypes") + @Test(timeout = 10000) + public void testRecoveryContainerExitCode() throws IOException, YarnException { + Container container = mock(Container.class); + when(container.getContainerId()).thenReturn(ContainerId.newContainerId( + ApplicationAttemptId.newInstance(ApplicationId.newInstance( + System.currentTimeMillis(), 1), 1), 1)); + ContainerLaunchContext clc = mock(ContainerLaunchContext.class); + when(clc.getCommands()).thenReturn(Collections.emptyList()); + when(container.getLaunchContext()).thenReturn(clc); + when(container.getLocalizedResources()) + .thenReturn(Collections.> emptyMap()); + Dispatcher dispatcher = mock(Dispatcher.class); + EventHandler eventHandler = new EventHandler() { + @Override + public void handle(Event event) { + if(event instanceof ContainerExitEvent){ + ContainerExitEvent exitEvent = (ContainerExitEvent) event; + } + } + }; + when(dispatcher.getEventHandler()).thenReturn(eventHandler); + RecoveredContainerLaunch launch = new RecoveredContainerLaunch(context, new Configuration(), + dispatcher, exec, null, container, dirsHandler, containerManager); + String pidFileSubpath = launch + .getPidFileSubpath(container.getContainerId().getApplicationAttemptId().getApplicationId().toString(), + container.getContainerId().toString()); + File pidFile = new File(this.dirsHandler.getLocalDirsForRead().get(0) + "/" + pidFileSubpath); + File exitCodeFile = new File(this.dirsHandler.getLocalDirsForRead().get(0) + "/" + pidFileSubpath+".exitcode"); + new File(pidFile.getParent()).mkdirs(); + PrintWriter fileWriter = new PrintWriter(pidFile); + PrintWriter exitCodeFileWriter = new PrintWriter(exitCodeFile); + fileWriter.println("2"); + exitCodeFileWriter.println(ContainerExitStatus.KILLED_EXCEEDED_PMEM); + fileWriter.close(); + exitCodeFileWriter.close(); + Integer exitCode = launch.call(); + Assert.assertEquals(exitCode,Integer.valueOf(ContainerExitStatus.KILLED_EXCEEDED_PMEM)); + } +}