diff --git hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index eacc0bf..18ee182 100644 --- hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -123,6 +123,56 @@ public static boolean isJava7OrAbove() { : new String[] { "ln", "-s", target, link }; } + /** Return a command for determining if process with specified pid is alive. */ + public static String[] getCheckProcessIsAliveCommand(String pid) { + return Shell.WINDOWS ? + new String[] { Shell.WINUTILS, "task", "isAlive", pid } : + new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid }; + } + + /** Return a command to send a signal to a given pid */ + public static String[] getSignalKillCommand(int code, String pid) { + return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } : + new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid }; + } + + /** + * Returns a File referencing a script with the given basename, inside the + * given parent directory. The file extension is inferred by platform: ".cmd" + * on Windows, or ".sh" otherwise. + * + * @param parent File parent directory + * @param basename String script file basename + * @return File referencing the script in the directory + */ + public static File appendScriptExtension(File parent, String basename) { + return new File(parent, appendScriptExtension(basename)); + } + + /** + * Returns a script file name with the given basename. The file extension is + * inferred by platform: ".cmd" on Windows, or ".sh" otherwise. + * + * @param basename String script file basename + * @return String script file name + */ + public static String appendScriptExtension(String basename) { + return basename + (WINDOWS ? ".cmd" : ".sh"); + } + + /** + * Returns a command to run the given script. The script interpreter is + * inferred by platform: cmd on Windows or bash otherwise. + * + * @param script File script to run + * @return String[] command to run the script + */ + public static String[] getRunScriptCommand(File script) { + String absolutePath = script.getAbsolutePath(); + return WINDOWS ? new String[] { "cmd", "/c", absolutePath } : + new String[] { "/bin/bash", absolutePath }; + } + /** a Unix command to set permission */ public static final String SET_PERMISSION_COMMAND = "chmod"; /** a Unix command to set owner */ @@ -243,6 +293,26 @@ public static final String getWinUtilsPath() { return winUtilsPath; } + public static final boolean isSetsidAvailable = isSetsidSupported(); + private static boolean isSetsidSupported() { + if (Shell.WINDOWS) { + return false; + } + ShellCommandExecutor shexec = null; + boolean setsidSupported = true; + try { + String[] args = {"setsid", "bash", "-c", "echo $$"}; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("setsid is not available on this machine. So not using it."); + setsidSupported = false; + } finally { // handle the exit code + LOG.info("setsid exited with exit code " + shexec.getExitCode()); + } + return setsidSupported; + } + /** Token separator regex used to parse Shell tool outputs */ public static final String TOKEN_SEPARATOR_REGEX = WINDOWS ? "[|\n\r]" : "[ \t\n\r\f]"; diff --git hadoop-common-project/hadoop-common/src/main/winutils/task.c hadoop-common-project/hadoop-common/src/main/winutils/task.c index 5a5345b..b8267ca 100644 --- hadoop-common-project/hadoop-common/src/main/winutils/task.c +++ hadoop-common-project/hadoop-common/src/main/winutils/task.c @@ -24,6 +24,10 @@ #define ERROR_TASK_NOT_ALIVE 1 +// This exit code for killed processes is compatible with Unix, where a killed +// process exits with 128 + signal. For SIGKILL, this would be 128 + 9 = 137. +#define KILLED_PROCESS_EXIT_CODE 137 + // List of different task related command line options supported by // winutils. typedef enum TaskCommandOptionType @@ -264,7 +268,7 @@ DWORD killTask(_TCHAR* jobObjName) return err; } - if(TerminateJobObject(jobObject, 1) == 0) + if(TerminateJobObject(jobObject, KILLED_PROCESS_EXIT_CODE) == 0) { return GetLastError(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index c386101..327a738 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -222,19 +222,6 @@ protected Path getPidFilePath(ContainerId containerId) { } - /** Return a command for determining if process with specified pid is alive. */ - protected static String[] getCheckProcessIsAliveCommand(String pid) { - return Shell.WINDOWS ? - new String[] { Shell.WINUTILS, "task", "isAlive", pid } : - new String[] { "kill", "-0", pid }; - } - - /** Return a command to send a signal to a given pid */ - protected static String[] getSignalKillCommand(int code, String pid) { - return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } : - new String[] { "kill", "-" + code, pid }; - } - /** * Is the container still active? * @param containerId @@ -303,26 +290,6 @@ public String getProcessId(ContainerId containerID) { return pid; } - public static final boolean isSetsidAvailable = isSetsidSupported(); - private static boolean isSetsidSupported() { - if (Shell.WINDOWS) { - return true; - } - ShellCommandExecutor shexec = null; - boolean setsidSupported = true; - try { - String[] args = {"setsid", "bash", "-c", "echo $$"}; - shexec = new ShellCommandExecutor(args); - shexec.execute(); - } catch (IOException ioe) { - LOG.warn("setsid is not available on this machine. So not using it."); - setsidSupported = false; - } finally { // handle the exit code - LOG.info("setsid exited with exit code " + shexec.getExitCode()); - } - return setsidSupported; - } - public static class DelayedProcessKiller extends Thread { private final String user; private final String pid; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index 53c5659..e42d74d 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -50,6 +50,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.util.ConverterUtils; +import com.google.common.annotations.VisibleForTesting; + public class DefaultContainerExecutor extends ContainerExecutor { private static final Log LOG = LogFactory @@ -237,8 +239,9 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile) throws IOExcep protected abstract void writeLocalWrapperScript(Path launchDst, Path pidFile, PrintStream pout); - protected LocalWrapperScriptBuilder(Path wrapperScriptPath) { - this.wrapperScriptPath = wrapperScriptPath; + protected LocalWrapperScriptBuilder(Path containerWorkDir) { + this.wrapperScriptPath = new Path(containerWorkDir, + Shell.appendScriptExtension("default_container_executor")); } } @@ -246,7 +249,7 @@ protected LocalWrapperScriptBuilder(Path wrapperScriptPath) { extends LocalWrapperScriptBuilder { public UnixLocalWrapperScriptBuilder(Path containerWorkDir) { - super(new Path(containerWorkDir, "default_container_executor.sh")); + super(containerWorkDir); } @Override @@ -260,7 +263,7 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile, pout.println(); pout.println("echo $$ > " + pidFile.toString() + ".tmp"); pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile); - String exec = ContainerExecutor.isSetsidAvailable? "exec setsid" : "exec"; + String exec = Shell.isSetsidAvailable? "exec setsid" : "exec"; pout.println(exec + " /bin/bash -c \"" + launchDst.toUri().getPath().toString() + "\""); } @@ -274,7 +277,7 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile, public WindowsLocalWrapperScriptBuilder(String containerIdStr, Path containerWorkDir) { - super(new Path(containerWorkDir, "default_container_executor.cmd")); + super(containerWorkDir); this.containerIdStr = containerIdStr; } @@ -297,18 +300,15 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile, @Override public boolean signalContainer(String user, String pid, Signal signal) throws IOException { - final String sigpid = ContainerExecutor.isSetsidAvailable - ? "-" + pid - : pid; - LOG.debug("Sending signal " + signal.getValue() + " to pid " + sigpid + LOG.debug("Sending signal " + signal.getValue() + " to pid " + pid + " as user " + user); - if (!containerIsAlive(sigpid)) { + if (!containerIsAlive(pid)) { return false; } try { - killContainer(sigpid, signal); + killContainer(pid, signal); } catch (IOException e) { - if (!containerIsAlive(sigpid)) { + if (!containerIsAlive(pid)) { return false; } throw e; @@ -322,9 +322,11 @@ public boolean signalContainer(String user, String pid, Signal signal) * @param pid String pid * @return boolean true if the process is alive */ - private boolean containerIsAlive(String pid) throws IOException { + @VisibleForTesting + public static boolean containerIsAlive(String pid) throws IOException { try { - new ShellCommandExecutor(getCheckProcessIsAliveCommand(pid)).execute(); + new ShellCommandExecutor(Shell.getCheckProcessIsAliveCommand(pid)) + .execute(); // successful execution means process is alive return true; } @@ -342,7 +344,7 @@ private boolean containerIsAlive(String pid) throws IOException { * (for logging). */ private void killContainer(String pid, Signal signal) throws IOException { - new ShellCommandExecutor(getSignalKillCommand(signal.getValue(), pid)) + new ShellCommandExecutor(Shell.getSignalKillCommand(signal.getValue(), pid)) .execute(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 71809b2..f38ee59 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -71,8 +71,8 @@ private static final Log LOG = LogFactory.getLog(ContainerLaunch.class); - public static final String CONTAINER_SCRIPT = Shell.WINDOWS ? - "launch_container.cmd" : "launch_container.sh"; + public static final String CONTAINER_SCRIPT = + Shell.appendScriptExtension("launch_container"); public static final String FINAL_CONTAINER_TOKENS_FILE = "container_tokens"; private static final String PID_FILE_NAME_FMT = "%s.pid"; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index f422617..1d4b0be 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -19,12 +19,13 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.BufferedReader; -import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -34,6 +35,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -51,6 +53,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; @@ -71,6 +74,7 @@ .getRecordFactory(null); static final String user = "nobody"; private FileContext localFS; + private ContainerId cId; @Before public void setup() throws UnsupportedFileSystemException { @@ -79,6 +83,9 @@ public void setup() throws UnsupportedFileSystemException { logsDir.mkdirs(); remoteLogsDir.mkdirs(); nmLocalDir.mkdirs(); + + // Construct the Container-id + cId = createContainerId(); } @After @@ -98,8 +105,6 @@ public void testKillContainersOnShutdown() throws IOException { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -120,9 +125,7 @@ public void testKillContainersOnShutdown() throws IOException { localResources.put(destinationFile, localResource); containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + List commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile)); containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -149,25 +152,32 @@ public void testKillContainersOnShutdown() throws IOException { nm.stop(); - // Now verify the contents of the file - // Script generates a message when it receives a sigterm - // so we look for that - BufferedReader reader = - new BufferedReader(new FileReader(processStartFile)); + // Now verify the contents of the file. Script generates a message when it + // receives a sigterm so we look for that. We cannot perform this check on + // Windows, because the process is not notified when killed by winutils. + // There is no way for the process to trap and respond. Instead, we can + // verify that the job object with ID matching container ID no longer exists. + if (Shell.WINDOWS) { + Assert.assertFalse("Process is still alive!", + DefaultContainerExecutor.containerIsAlive(cId.toString())); + } else { + BufferedReader reader = + new BufferedReader(new FileReader(processStartFile)); - boolean foundSigTermMessage = false; - while (true) { - String line = reader.readLine(); - if (line == null) { - break; - } - if (line.contains("SIGTERM")) { - foundSigTermMessage = true; - break; + boolean foundSigTermMessage = false; + while (true) { + String line = reader.readLine(); + if (line == null) { + break; + } + if (line.contains("SIGTERM")) { + foundSigTermMessage = true; + break; + } } + Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); + reader.close(); } - Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); - reader.close(); } private ContainerId createContainerId() { @@ -200,16 +210,24 @@ private YarnConfiguration createNMConfig() { * stopped by external means. */ private File createUnhaltingScriptFile() throws IOException { - File scriptFile = new File(tmpDir, "scriptFile.sh"); - BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile)); - fileWriter.write("#!/bin/bash\n\n"); - fileWriter.write("echo \"Running testscript for delayed kill\"\n"); - fileWriter.write("hello=\"Got SIGTERM\"\n"); - fileWriter.write("umask 0\n"); - fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n"); - fileWriter.write("echo \"Writing pid to start file\"\n"); - fileWriter.write("echo $$ >> " + processStartFile + "\n"); - fileWriter.write("while true; do\ndate >> /dev/null;\n done\n"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Shell.WINDOWS) { + fileWriter.println("@echo \"Running testscript for delayed kill\""); + fileWriter.println("@echo \"Writing pid to start file\""); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@pause"); + } else { + fileWriter.write("#!/bin/bash\n\n"); + fileWriter.write("echo \"Running testscript for delayed kill\"\n"); + fileWriter.write("hello=\"Got SIGTERM\"\n"); + fileWriter.write("umask 0\n"); + fileWriter.write("trap \"echo $hello >> " + processStartFile + + "\" SIGTERM\n"); + fileWriter.write("echo \"Writing pid to start file\"\n"); + fileWriter.write("echo $$ >> " + processStartFile + "\n"); + fileWriter.write("while true; do\ndate >> /dev/null;\n done\n"); + } fileWriter.close(); return scriptFile; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 8300d8f..55e92a4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -76,15 +76,15 @@ public BaseContainerManagerTest() throws UnsupportedFileSystemException { localFS = FileContext.getLocalFSFileContext(); localDir = - new File("target", this.getClass().getName() + "-localDir") + new File("target", this.getClass().getSimpleName() + "-localDir") .getAbsoluteFile(); localLogDir = - new File("target", this.getClass().getName() + "-localLogDir") + new File("target", this.getClass().getSimpleName() + "-localLogDir") .getAbsoluteFile(); remoteLogDir = - new File("target", this.getClass().getName() + "-remoteLogDir") + new File("target", this.getClass().getSimpleName() + "-remoteLogDir") .getAbsoluteFile(); - tmpDir = new File("target", this.getClass().getName() + "-tmpDir"); + tmpDir = new File("target", this.getClass().getSimpleName() + "-tmpDir"); } protected static Log LOG = LogFactory diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 5b01cc0..4eb0e33 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; @@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -193,21 +195,29 @@ public void testContainerLaunchAndStop() throws IOException, InterruptedException { containerManager.start(); - File scriptFile = new File(tmpDir, "scriptFile.sh"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); PrintWriter fileWriter = new PrintWriter(scriptFile); File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho Hello World! > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - fileWriter.write("\nexec sleep 100"); + + // ////// Construct the Container-id + ContainerId cId = createContainerId(); + + if (Shell.WINDOWS) { + fileWriter.println("@echo Hello World!> " + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho Hello World! > " + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + fileWriter.write("\nexec sleep 100"); + } fileWriter.close(); ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -228,13 +238,11 @@ public void testContainerLaunchAndStop() throws IOException, localResources.put(destinationFile, rsrc_alpha); containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + List commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile)); containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + containerLaunchContext.getResource().setMemory(100); // MB StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); containerManager.startContainer(startRequest); @@ -260,12 +268,10 @@ public void testContainerLaunchAndStop() throws IOException, // Assert that the process is alive Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); // Once more Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(cId); @@ -279,28 +285,39 @@ public void testContainerLaunchAndStop() throws IOException, gcsRequest.setContainerId(cId); ContainerStatus containerStatus = containerManager.getContainerStatus(gcsRequest).getStatus(); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), - containerStatus.getExitStatus()); + int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(); + Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore Assert.assertFalse("Process is still alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); } private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException { - File scriptFile = new File(tmpDir, "scriptFile.sh"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); PrintWriter fileWriter = new PrintWriter(scriptFile); File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho Hello World! > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - // Have script throw an exit code at the end - if (exitCode != 0) { - fileWriter.write("\nexit "+exitCode); + // ////// Construct the Container-id + ContainerId cId = createContainerId(); + + if (Shell.WINDOWS) { + fileWriter.println("@echo Hello World!> " + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + if (exitCode != 0) { + fileWriter.println("@exit " + exitCode); + } + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho Hello World! > " + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + // Have script throw an exit code at the end + if (exitCode != 0) { + fileWriter.write("\nexit "+exitCode); + } } fileWriter.close(); @@ -308,8 +325,6 @@ private void testContainerLaunchAndExit(int exitCode) throws IOException, Interr ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -330,13 +345,11 @@ private void testContainerLaunchAndExit(int exitCode) throws IOException, Interr localResources.put(destinationFile, rsrc_alpha); containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + List commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile)); containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + containerLaunchContext.getResource().setMemory(100); // MB StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 822835d..bcd94e2 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -84,13 +85,15 @@ public void testSpecialCharSymlinks() throws IOException { File shellFile = null; File tempFile = null; - String badSymlink = "foo@zz%_#*&!-+= bar()"; + String badSymlink = Shell.WINDOWS ? "foo@zz_#!-+bar.cmd" : + "foo@zz%_#*&!-+= bar()"; File symLinkFile = null; try { - shellFile = new File(tmpDir, "hello.sh"); - tempFile = new File(tmpDir, "temp.sh"); - String timeoutCommand = "echo \"hello\""; + shellFile = Shell.appendScriptExtension(tmpDir, "hello"); + tempFile = Shell.appendScriptExtension(tmpDir, "temp"); + String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" : + "echo \"hello\""; PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); shellFile.setExecutable(true); writer.println(timeoutCommand); @@ -105,7 +108,13 @@ public void testSpecialCharSymlinks() throws IOException { Map env = new HashMap(); List commands = new ArrayList(); - commands.add("/bin/sh ./\\\"" + badSymlink + "\\\""); + if (Shell.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add("\"" + badSymlink + "\""); + } else { + commands.add("/bin/sh ./\\\"" + badSymlink + "\\\""); + } ContainerLaunch.writeLaunchEnv(fos, env, resources, commands); fos.flush(); @@ -141,16 +150,30 @@ public void testSpecialCharSymlinks() throws IOException { // this is a dirty hack - but should be ok for a unittest. @SuppressWarnings({ "rawtypes", "unchecked" }) public static void setNewEnvironmentHack(Map newenv) throws Exception { - Class[] classes = Collections.class.getDeclaredClasses(); - Map env = System.getenv(); - for (Class cl : classes) { - if ("java.util.Collections$UnmodifiableMap".equals(cl.getName())) { - Field field = cl.getDeclaredField("m"); - field.setAccessible(true); - Object obj = field.get(env); - Map map = (Map) obj; - map.clear(); - map.putAll(newenv); + try { + Class cl = Class.forName("java.lang.ProcessEnvironment"); + Field field = cl.getDeclaredField("theEnvironment"); + field.setAccessible(true); + Map env = (Map)field.get(null); + env.clear(); + env.putAll(newenv); + Field ciField = cl.getDeclaredField("theCaseInsensitiveEnvironment"); + ciField.setAccessible(true); + Map cienv = (Map)ciField.get(null); + cienv.clear(); + cienv.putAll(newenv); + } catch (NoSuchFieldException e) { + Class[] classes = Collections.class.getDeclaredClasses(); + Map env = System.getenv(); + for (Class cl : classes) { + if ("java.util.Collections$UnmodifiableMap".equals(cl.getName())) { + Field field = cl.getDeclaredField("m"); + field.setAccessible(true); + Object obj = field.get(env); + Map map = (Map) obj; + map.clear(); + map.putAll(newenv); + } } } } @@ -168,22 +191,6 @@ public void testContainerEnvVariables() throws Exception { envWithDummy.put(Environment.MALLOC_ARENA_MAX.name(), "99"); setNewEnvironmentHack(envWithDummy); - String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name()); - File scriptFile = new File(tmpDir, "scriptFile.sh"); - PrintWriter fileWriter = new PrintWriter(scriptFile); - File processStartFile = - new File(tmpDir, "env_vars.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho $" + Environment.MALLOC_ARENA_MAX.name() + " > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - fileWriter.write("\nexec sleep 100"); - fileWriter.close(); - - assert(malloc != null && !"".equals(malloc)); - - ContainerLaunchContext containerLaunchContext = - recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(0); @@ -195,6 +202,30 @@ public void testContainerEnvVariables() throws Exception { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); + + String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name()); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + File processStartFile = + new File(tmpDir, "env_vars.txt").getAbsoluteFile(); + if (Shell.WINDOWS) { + fileWriter.println("@echo " + Environment.MALLOC_ARENA_MAX.$() + "> " + + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho " + Environment.MALLOC_ARENA_MAX.$() + " > " + + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + + assert(malloc != null && !"".equals(malloc)); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -218,9 +249,7 @@ public void testContainerEnvVariables() throws Exception { // set up the rest of the container containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + List commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile)); containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -250,12 +279,10 @@ public void testContainerEnvVariables() throws Exception { // Assert that the process is alive Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); // Once more Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(cId); @@ -269,39 +296,19 @@ public void testContainerEnvVariables() throws Exception { gcsRequest.setContainerId(cId); ContainerStatus containerStatus = containerManager.getContainerStatus(gcsRequest).getStatus(); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), - containerStatus.getExitStatus()); + int expectedExitCode = Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(); + Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore Assert.assertFalse("Process is still alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); } @Test public void testDelayedKill() throws Exception { containerManager.start(); - File processStartFile = - new File(tmpDir, "pid.txt").getAbsoluteFile(); - - // setup a script that can handle sigterm gracefully - File scriptFile = new File(tmpDir, "testscript.sh"); - PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile)); - writer.println("#!/bin/bash\n\n"); - writer.println("echo \"Running testscript for delayed kill\""); - writer.println("hello=\"Got SIGTERM\""); - writer.println("umask 0"); - writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM"); - writer.println("echo \"Writing pid to start file\""); - writer.println("echo $$ >> " + processStartFile); - writer.println("while true; do\nsleep 1s;\ndone"); - writer.close(); - scriptFile.setExecutable(true); - - ContainerLaunchContext containerLaunchContext = - recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(1); @@ -313,6 +320,33 @@ public void testDelayedKill() throws Exception { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); + + File processStartFile = + new File(tmpDir, "pid.txt").getAbsoluteFile(); + + // setup a script that can handle sigterm gracefully + File scriptFile = Shell.appendScriptExtension(tmpDir, "testscript"); + PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile)); + if (Shell.WINDOWS) { + writer.println("@echo \"Running testscript for delayed kill\""); + writer.println("@echo \"Writing pid to start file\""); + writer.println("@echo " + cId + "> " + processStartFile); + writer.println("@ping -n 100 127.0.0.1 >nul"); + } else { + writer.println("#!/bin/bash\n\n"); + writer.println("echo \"Running testscript for delayed kill\""); + writer.println("hello=\"Got SIGTERM\""); + writer.println("umask 0"); + writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM"); + writer.println("echo \"Writing pid to start file\""); + writer.println("echo $$ >> " + processStartFile); + writer.println("while true; do\nsleep 1s;\ndone"); + } + writer.close(); + scriptFile.setExecutable(true); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -336,8 +370,7 @@ public void testDelayedKill() throws Exception { // set up the rest of the container containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add(scriptFile.getAbsolutePath()); + List commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile)); containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -371,25 +404,32 @@ public void testDelayedKill() throws Exception { Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), containerStatus.getExitStatus()); - // Now verify the contents of the file - // Script generates a message when it receives a sigterm - // so we look for that - BufferedReader reader = - new BufferedReader(new FileReader(processStartFile)); - - boolean foundSigTermMessage = false; - while (true) { - String line = reader.readLine(); - if (line == null) { - break; - } - if (line.contains("SIGTERM")) { - foundSigTermMessage = true; - break; + // Now verify the contents of the file. Script generates a message when it + // receives a sigterm so we look for that. We cannot perform this check on + // Windows, because the process is not notified when killed by winutils. + // There is no way for the process to trap and respond. Instead, we can + // verify that the job object with ID matching container ID no longer exists. + if (Shell.WINDOWS) { + Assert.assertFalse("Process is still alive!", + DefaultContainerExecutor.containerIsAlive(cId.toString())); + } else { + BufferedReader reader = + new BufferedReader(new FileReader(processStartFile)); + + boolean foundSigTermMessage = false; + while (true) { + String line = reader.readLine(); + if (line == null) { + break; + } + if (line.contains("SIGTERM")) { + foundSigTermMessage = true; + break; + } } + Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); + reader.close(); } - Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); - reader.close(); } }