diff --git hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index eacc0bf..d1b7d2c 100644 --- hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -123,6 +123,19 @@ public static boolean isJava7OrAbove() { : new String[] { "ln", "-s", target, link }; } + /** Return a command for determining if process with specified pid is alive. */ + public static String[] getCheckProcessIsAliveCommand(String pid) { + return Shell.WINDOWS ? + new String[] { Shell.WINUTILS, "task", "isAlive", pid } : + new String[] { "kill", "-0", isSetsidAvailable ? "-" + pid : pid }; + } + + /** Return a command to send a signal to a given pid */ + public static String[] getSignalKillCommand(int code, String pid) { + return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } : + new String[] { "kill", "-" + code, isSetsidAvailable ? "-" + pid : pid }; + } + /** a Unix command to set permission */ public static final String SET_PERMISSION_COMMAND = "chmod"; /** a Unix command to set owner */ @@ -243,6 +256,26 @@ public static final String getWinUtilsPath() { return winUtilsPath; } + public static final boolean isSetsidAvailable = isSetsidSupported(); + private static boolean isSetsidSupported() { + if (Shell.WINDOWS) { + return false; + } + ShellCommandExecutor shexec = null; + boolean setsidSupported = true; + try { + String[] args = {"setsid", "bash", "-c", "echo $$"}; + shexec = new ShellCommandExecutor(args); + shexec.execute(); + } catch (IOException ioe) { + LOG.warn("setsid is not available on this machine. So not using it."); + setsidSupported = false; + } finally { // handle the exit code + LOG.info("setsid exited with exit code " + shexec.getExitCode()); + } + return setsidSupported; + } + /** Token separator regex used to parse Shell tool outputs */ public static final String TOKEN_SEPARATOR_REGEX = WINDOWS ? "[|\n\r]" : "[ \t\n\r\f]"; diff --git hadoop-common-project/hadoop-common/src/main/winutils/task.c hadoop-common-project/hadoop-common/src/main/winutils/task.c index 5a5345b..b8267ca 100644 --- hadoop-common-project/hadoop-common/src/main/winutils/task.c +++ hadoop-common-project/hadoop-common/src/main/winutils/task.c @@ -24,6 +24,10 @@ #define ERROR_TASK_NOT_ALIVE 1 +// This exit code for killed processes is compatible with Unix, where a killed +// process exits with 128 + signal. For SIGKILL, this would be 128 + 9 = 137. +#define KILLED_PROCESS_EXIT_CODE 137 + // List of different task related command line options supported by // winutils. typedef enum TaskCommandOptionType @@ -264,7 +268,7 @@ DWORD killTask(_TCHAR* jobObjName) return err; } - if(TerminateJobObject(jobObject, 1) == 0) + if(TerminateJobObject(jobObject, KILLED_PROCESS_EXIT_CODE) == 0) { return GetLastError(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java index c386101..327a738 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerExecutor.java @@ -222,19 +222,6 @@ protected Path getPidFilePath(ContainerId containerId) { } - /** Return a command for determining if process with specified pid is alive. */ - protected static String[] getCheckProcessIsAliveCommand(String pid) { - return Shell.WINDOWS ? - new String[] { Shell.WINUTILS, "task", "isAlive", pid } : - new String[] { "kill", "-0", pid }; - } - - /** Return a command to send a signal to a given pid */ - protected static String[] getSignalKillCommand(int code, String pid) { - return Shell.WINDOWS ? new String[] { Shell.WINUTILS, "task", "kill", pid } : - new String[] { "kill", "-" + code, pid }; - } - /** * Is the container still active? * @param containerId @@ -303,26 +290,6 @@ public String getProcessId(ContainerId containerID) { return pid; } - public static final boolean isSetsidAvailable = isSetsidSupported(); - private static boolean isSetsidSupported() { - if (Shell.WINDOWS) { - return true; - } - ShellCommandExecutor shexec = null; - boolean setsidSupported = true; - try { - String[] args = {"setsid", "bash", "-c", "echo $$"}; - shexec = new ShellCommandExecutor(args); - shexec.execute(); - } catch (IOException ioe) { - LOG.warn("setsid is not available on this machine. So not using it."); - setsidSupported = false; - } finally { // handle the exit code - LOG.info("setsid exited with exit code " + shexec.getExitCode()); - } - return setsidSupported; - } - public static class DelayedProcessKiller extends Thread { private final String user; private final String pid; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index 53c5659..82ce9a6 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -50,6 +50,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.util.ConverterUtils; +import com.google.common.annotations.VisibleForTesting; + public class DefaultContainerExecutor extends ContainerExecutor { private static final Log LOG = LogFactory @@ -260,7 +262,7 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile, pout.println(); pout.println("echo $$ > " + pidFile.toString() + ".tmp"); pout.println("/bin/mv -f " + pidFile.toString() + ".tmp " + pidFile); - String exec = ContainerExecutor.isSetsidAvailable? "exec setsid" : "exec"; + String exec = Shell.isSetsidAvailable? "exec setsid" : "exec"; pout.println(exec + " /bin/bash -c \"" + launchDst.toUri().getPath().toString() + "\""); } @@ -297,18 +299,15 @@ public void writeLocalWrapperScript(Path launchDst, Path pidFile, @Override public boolean signalContainer(String user, String pid, Signal signal) throws IOException { - final String sigpid = ContainerExecutor.isSetsidAvailable - ? "-" + pid - : pid; - LOG.debug("Sending signal " + signal.getValue() + " to pid " + sigpid + LOG.debug("Sending signal " + signal.getValue() + " to pid " + pid + " as user " + user); - if (!containerIsAlive(sigpid)) { + if (!containerIsAlive(pid)) { return false; } try { - killContainer(sigpid, signal); + killContainer(pid, signal); } catch (IOException e) { - if (!containerIsAlive(sigpid)) { + if (!containerIsAlive(pid)) { return false; } throw e; @@ -322,9 +321,11 @@ public boolean signalContainer(String user, String pid, Signal signal) * @param pid String pid * @return boolean true if the process is alive */ - private boolean containerIsAlive(String pid) throws IOException { + @VisibleForTesting + public static boolean containerIsAlive(String pid) throws IOException { try { - new ShellCommandExecutor(getCheckProcessIsAliveCommand(pid)).execute(); + new ShellCommandExecutor(Shell.getCheckProcessIsAliveCommand(pid)) + .execute(); // successful execution means process is alive return true; } @@ -342,7 +343,7 @@ private boolean containerIsAlive(String pid) throws IOException { * (for logging). */ private void killContainer(String pid, Signal signal) throws IOException { - new ShellCommandExecutor(getSignalKillCommand(signal.getValue(), pid)) + new ShellCommandExecutor(Shell.getSignalKillCommand(signal.getValue(), pid)) .execute(); } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index f422617..a77536f 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -19,11 +19,11 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.BufferedReader; -import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; @@ -71,6 +72,7 @@ .getRecordFactory(null); static final String user = "nobody"; private FileContext localFS; + private ContainerId cId; @Before public void setup() throws UnsupportedFileSystemException { @@ -79,6 +81,9 @@ public void setup() throws UnsupportedFileSystemException { logsDir.mkdirs(); remoteLogsDir.mkdirs(); nmLocalDir.mkdirs(); + + // Construct the Container-id + cId = createContainerId(); } @After @@ -98,8 +103,6 @@ public void testKillContainersOnShutdown() throws IOException { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -121,8 +124,14 @@ public void testKillContainersOnShutdown() throws IOException { containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + if (Path.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add(scriptFile.getAbsolutePath()); + } else { + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + } containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -149,25 +158,32 @@ public void testKillContainersOnShutdown() throws IOException { nm.stop(); - // Now verify the contents of the file - // Script generates a message when it receives a sigterm - // so we look for that - BufferedReader reader = - new BufferedReader(new FileReader(processStartFile)); + // Now verify the contents of the file. Script generates a message when it + // receives a sigterm so we look for that. We cannot perform this check on + // Windows, because the process is not notified when killed by winutils. + // There is no way for the process to trap and respond. Instead, we can + // verify that the job object with ID matching container ID no longer exists. + if (Path.WINDOWS) { + Assert.assertFalse("Process is still alive!", + DefaultContainerExecutor.containerIsAlive(cId.toString())); + } else { + BufferedReader reader = + new BufferedReader(new FileReader(processStartFile)); - boolean foundSigTermMessage = false; - while (true) { - String line = reader.readLine(); - if (line == null) { - break; - } - if (line.contains("SIGTERM")) { - foundSigTermMessage = true; - break; + boolean foundSigTermMessage = false; + while (true) { + String line = reader.readLine(); + if (line == null) { + break; + } + if (line.contains("SIGTERM")) { + foundSigTermMessage = true; + break; + } } + Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); + reader.close(); } - Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); - reader.close(); } private ContainerId createContainerId() { @@ -200,16 +216,25 @@ private YarnConfiguration createNMConfig() { * stopped by external means. */ private File createUnhaltingScriptFile() throws IOException { - File scriptFile = new File(tmpDir, "scriptFile.sh"); - BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile)); - fileWriter.write("#!/bin/bash\n\n"); - fileWriter.write("echo \"Running testscript for delayed kill\"\n"); - fileWriter.write("hello=\"Got SIGTERM\"\n"); - fileWriter.write("umask 0\n"); - fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n"); - fileWriter.write("echo \"Writing pid to start file\"\n"); - fileWriter.write("echo $$ >> " + processStartFile + "\n"); - fileWriter.write("while true; do\ndate >> /dev/null;\n done\n"); + File scriptFile = new File(tmpDir, Path.WINDOWS ? "scriptFile.cmd" : + "scriptFile.sh"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Path.WINDOWS) { + fileWriter.println("@echo \"Running testscript for delayed kill\""); + fileWriter.println("@echo \"Writing pid to start file\""); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@pause"); + } else { + fileWriter.write("#!/bin/bash\n\n"); + fileWriter.write("echo \"Running testscript for delayed kill\"\n"); + fileWriter.write("hello=\"Got SIGTERM\"\n"); + fileWriter.write("umask 0\n"); + fileWriter.write("trap \"echo $hello >> " + processStartFile + + "\" SIGTERM\n"); + fileWriter.write("echo \"Writing pid to start file\"\n"); + fileWriter.write("echo $$ >> " + processStartFile + "\n"); + fileWriter.write("while true; do\ndate >> /dev/null;\n done\n"); + } fileWriter.close(); return scriptFile; diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 8300d8f..55e92a4 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -76,15 +76,15 @@ public BaseContainerManagerTest() throws UnsupportedFileSystemException { localFS = FileContext.getLocalFSFileContext(); localDir = - new File("target", this.getClass().getName() + "-localDir") + new File("target", this.getClass().getSimpleName() + "-localDir") .getAbsoluteFile(); localLogDir = - new File("target", this.getClass().getName() + "-localLogDir") + new File("target", this.getClass().getSimpleName() + "-localLogDir") .getAbsoluteFile(); remoteLogDir = - new File("target", this.getClass().getName() + "-remoteLogDir") + new File("target", this.getClass().getSimpleName() + "-remoteLogDir") .getAbsoluteFile(); - tmpDir = new File("target", this.getClass().getName() + "-tmpDir"); + tmpDir = new File("target", this.getClass().getSimpleName() + "-tmpDir"); } protected static Log LOG = LogFactory diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 5b01cc0..8be7ab9 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -193,21 +194,30 @@ public void testContainerLaunchAndStop() throws IOException, InterruptedException { containerManager.start(); - File scriptFile = new File(tmpDir, "scriptFile.sh"); + File scriptFile = new File(tmpDir, Path.WINDOWS ? "scriptFile.cmd" : + "scriptFile.sh"); PrintWriter fileWriter = new PrintWriter(scriptFile); File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho Hello World! > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - fileWriter.write("\nexec sleep 100"); + + // ////// Construct the Container-id + ContainerId cId = createContainerId(); + + if (Path.WINDOWS) { + fileWriter.println("@echo Hello World!> " + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho Hello World! > " + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + fileWriter.write("\nexec sleep 100"); + } fileWriter.close(); ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -229,12 +239,18 @@ public void testContainerLaunchAndStop() throws IOException, containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + if (Path.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add(scriptFile.getAbsolutePath()); + } else { + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + } containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + containerLaunchContext.getResource().setMemory(100); // MB StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); containerManager.startContainer(startRequest); @@ -260,12 +276,10 @@ public void testContainerLaunchAndStop() throws IOException, // Assert that the process is alive Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); // Once more Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(cId); @@ -279,28 +293,40 @@ public void testContainerLaunchAndStop() throws IOException, gcsRequest.setContainerId(cId); ContainerStatus containerStatus = containerManager.getContainerStatus(gcsRequest).getStatus(); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), - containerStatus.getExitStatus()); + int expectedExitCode = Path.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(); + Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore Assert.assertFalse("Process is still alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); } private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException { - File scriptFile = new File(tmpDir, "scriptFile.sh"); + File scriptFile = new File(tmpDir, Path.WINDOWS ? "scriptFile.cmd" : + "scriptFile.sh"); PrintWriter fileWriter = new PrintWriter(scriptFile); File processStartFile = new File(tmpDir, "start_file.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho Hello World! > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - // Have script throw an exit code at the end - if (exitCode != 0) { - fileWriter.write("\nexit "+exitCode); + // ////// Construct the Container-id + ContainerId cId = createContainerId(); + + if (Path.WINDOWS) { + fileWriter.println("@echo Hello World!> " + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + if (exitCode != 0) { + fileWriter.println("@exit " + exitCode); + } + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho Hello World! > " + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + // Have script throw an exit code at the end + if (exitCode != 0) { + fileWriter.write("\nexit "+exitCode); + } } fileWriter.close(); @@ -308,8 +334,6 @@ private void testContainerLaunchAndExit(int exitCode) throws IOException, Interr ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id - ContainerId cId = createContainerId(); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -331,12 +355,18 @@ private void testContainerLaunchAndExit(int exitCode) throws IOException, Interr containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + if (Path.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add(scriptFile.getAbsolutePath()); + } else { + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + } containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + containerLaunchContext.getResource().setMemory(100); // MB StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 822835d..82e0d0e 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; +import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -84,13 +85,15 @@ public void testSpecialCharSymlinks() throws IOException { File shellFile = null; File tempFile = null; - String badSymlink = "foo@zz%_#*&!-+= bar()"; + String badSymlink = Shell.WINDOWS ? "foo@zz_#!-+bar.cmd" : + "foo@zz%_#*&!-+= bar()"; File symLinkFile = null; try { - shellFile = new File(tmpDir, "hello.sh"); - tempFile = new File(tmpDir, "temp.sh"); - String timeoutCommand = "echo \"hello\""; + shellFile = new File(tmpDir, Shell.WINDOWS ? "hello.cmd" : "hello.sh"); + tempFile = new File(tmpDir, Shell.WINDOWS ? "temp.cmd" : "temp.sh"); + String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" : + "echo \"hello\""; PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); shellFile.setExecutable(true); writer.println(timeoutCommand); @@ -105,7 +108,13 @@ public void testSpecialCharSymlinks() throws IOException { Map env = new HashMap(); List commands = new ArrayList(); - commands.add("/bin/sh ./\\\"" + badSymlink + "\\\""); + if (Shell.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add("\"" + badSymlink + "\""); + } else { + commands.add("/bin/sh ./\\\"" + badSymlink + "\\\""); + } ContainerLaunch.writeLaunchEnv(fos, env, resources, commands); fos.flush(); @@ -141,16 +150,30 @@ public void testSpecialCharSymlinks() throws IOException { // this is a dirty hack - but should be ok for a unittest. @SuppressWarnings({ "rawtypes", "unchecked" }) public static void setNewEnvironmentHack(Map newenv) throws Exception { - Class[] classes = Collections.class.getDeclaredClasses(); - Map env = System.getenv(); - for (Class cl : classes) { - if ("java.util.Collections$UnmodifiableMap".equals(cl.getName())) { - Field field = cl.getDeclaredField("m"); - field.setAccessible(true); - Object obj = field.get(env); - Map map = (Map) obj; - map.clear(); - map.putAll(newenv); + try { + Class cl = Class.forName("java.lang.ProcessEnvironment"); + Field field = cl.getDeclaredField("theEnvironment"); + field.setAccessible(true); + Map env = (Map)field.get(null); + env.clear(); + env.putAll(newenv); + Field ciField = cl.getDeclaredField("theCaseInsensitiveEnvironment"); + ciField.setAccessible(true); + Map cienv = (Map)ciField.get(null); + cienv.clear(); + cienv.putAll(newenv); + } catch (NoSuchFieldException e) { + Class[] classes = Collections.class.getDeclaredClasses(); + Map env = System.getenv(); + for (Class cl : classes) { + if ("java.util.Collections$UnmodifiableMap".equals(cl.getName())) { + Field field = cl.getDeclaredField("m"); + field.setAccessible(true); + Object obj = field.get(env); + Map map = (Map) obj; + map.clear(); + map.putAll(newenv); + } } } } @@ -168,22 +191,6 @@ public void testContainerEnvVariables() throws Exception { envWithDummy.put(Environment.MALLOC_ARENA_MAX.name(), "99"); setNewEnvironmentHack(envWithDummy); - String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name()); - File scriptFile = new File(tmpDir, "scriptFile.sh"); - PrintWriter fileWriter = new PrintWriter(scriptFile); - File processStartFile = - new File(tmpDir, "env_vars.txt").getAbsoluteFile(); - fileWriter.write("\numask 0"); // So that start file is readable by the test - fileWriter.write("\necho $" + Environment.MALLOC_ARENA_MAX.name() + " > " + processStartFile); - fileWriter.write("\necho $$ >> " + processStartFile); - fileWriter.write("\nexec sleep 100"); - fileWriter.close(); - - assert(malloc != null && !"".equals(malloc)); - - ContainerLaunchContext containerLaunchContext = - recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(0); @@ -195,6 +202,31 @@ public void testContainerEnvVariables() throws Exception { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); + + String malloc = System.getenv(Environment.MALLOC_ARENA_MAX.name()); + File scriptFile = new File(tmpDir, Shell.WINDOWS ? "scriptFile.cmd" : + "scriptFile.sh"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + File processStartFile = + new File(tmpDir, "env_vars.txt").getAbsoluteFile(); + if (Shell.WINDOWS) { + fileWriter.println("@echo " + Environment.MALLOC_ARENA_MAX.$() + "> " + + processStartFile); + fileWriter.println("@echo " + cId + ">> " + processStartFile); + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); // So that start file is readable by the test + fileWriter.write("\necho " + Environment.MALLOC_ARENA_MAX.$() + " > " + + processStartFile); + fileWriter.write("\necho $$ >> " + processStartFile); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + + assert(malloc != null && !"".equals(malloc)); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -219,8 +251,14 @@ public void testContainerEnvVariables() throws Exception { // set up the rest of the container containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); + if (Shell.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add(scriptFile.getAbsolutePath()); + } else { + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + } containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -250,12 +288,10 @@ public void testContainerEnvVariables() throws Exception { // Assert that the process is alive Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); // Once more Assert.assertTrue("Process is not alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(cId); @@ -269,39 +305,19 @@ public void testContainerEnvVariables() throws Exception { gcsRequest.setContainerId(cId); ContainerStatus containerStatus = containerManager.getContainerStatus(gcsRequest).getStatus(); - Assert.assertEquals(ExitCode.TERMINATED.getExitCode(), - containerStatus.getExitStatus()); + int expectedExitCode = Path.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : + ExitCode.TERMINATED.getExitCode(); + Assert.assertEquals(expectedExitCode, containerStatus.getExitStatus()); // Assert that the process is not alive anymore Assert.assertFalse("Process is still alive!", - exec.signalContainer(user, - pid, Signal.NULL)); + DefaultContainerExecutor.containerIsAlive(pid)); } @Test public void testDelayedKill() throws Exception { containerManager.start(); - File processStartFile = - new File(tmpDir, "pid.txt").getAbsoluteFile(); - - // setup a script that can handle sigterm gracefully - File scriptFile = new File(tmpDir, "testscript.sh"); - PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile)); - writer.println("#!/bin/bash\n\n"); - writer.println("echo \"Running testscript for delayed kill\""); - writer.println("hello=\"Got SIGTERM\""); - writer.println("umask 0"); - writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM"); - writer.println("echo \"Writing pid to start file\""); - writer.println("echo $$ >> " + processStartFile); - writer.println("while true; do\nsleep 1s;\ndone"); - writer.close(); - scriptFile.setExecutable(true); - - ContainerLaunchContext containerLaunchContext = - recordFactory.newRecordInstance(ContainerLaunchContext.class); - // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(1); @@ -313,6 +329,34 @@ public void testDelayedKill() throws Exception { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); + + File processStartFile = + new File(tmpDir, "pid.txt").getAbsoluteFile(); + + // setup a script that can handle sigterm gracefully + File scriptFile = new File(tmpDir, Shell.WINDOWS ? "testscript.cmd" : + "testscript.sh"); + PrintWriter writer = new PrintWriter(new FileOutputStream(scriptFile)); + if (Shell.WINDOWS) { + writer.println("@echo \"Running testscript for delayed kill\""); + writer.println("@echo \"Writing pid to start file\""); + writer.println("@echo " + cId + "> " + processStartFile); + writer.println("@ping -n 100 127.0.0.1 >nul"); + } else { + writer.println("#!/bin/bash\n\n"); + writer.println("echo \"Running testscript for delayed kill\""); + writer.println("hello=\"Got SIGTERM\""); + writer.println("umask 0"); + writer.println("trap \"echo $hello >> " + processStartFile + "\" SIGTERM"); + writer.println("echo \"Writing pid to start file\""); + writer.println("echo $$ >> " + processStartFile); + writer.println("while true; do\nsleep 1s;\ndone"); + } + writer.close(); + scriptFile.setExecutable(true); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -337,7 +381,13 @@ public void testDelayedKill() throws Exception { // set up the rest of the container containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); - commands.add(scriptFile.getAbsolutePath()); + if (Shell.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add(scriptFile.getAbsolutePath()); + } else { + commands.add(scriptFile.getAbsolutePath()); + } containerLaunchContext.setCommands(commands); containerLaunchContext.setResource(recordFactory .newRecordInstance(Resource.class)); @@ -371,25 +421,32 @@ public void testDelayedKill() throws Exception { Assert.assertEquals(ExitCode.FORCE_KILLED.getExitCode(), containerStatus.getExitStatus()); - // Now verify the contents of the file - // Script generates a message when it receives a sigterm - // so we look for that - BufferedReader reader = - new BufferedReader(new FileReader(processStartFile)); - - boolean foundSigTermMessage = false; - while (true) { - String line = reader.readLine(); - if (line == null) { - break; - } - if (line.contains("SIGTERM")) { - foundSigTermMessage = true; - break; + // Now verify the contents of the file. Script generates a message when it + // receives a sigterm so we look for that. We cannot perform this check on + // Windows, because the process is not notified when killed by winutils. + // There is no way for the process to trap and respond. Instead, we can + // verify that the job object with ID matching container ID no longer exists. + if (Shell.WINDOWS) { + Assert.assertFalse("Process is still alive!", + DefaultContainerExecutor.containerIsAlive(cId.toString())); + } else { + BufferedReader reader = + new BufferedReader(new FileReader(processStartFile)); + + boolean foundSigTermMessage = false; + while (true) { + String line = reader.readLine(); + if (line == null) { + break; + } + if (line.contains("SIGTERM")) { + foundSigTermMessage = true; + break; + } } + Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); + reader.close(); } - Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); - reader.close(); } }