diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java index 5b534ba..3487145 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java @@ -41,6 +41,7 @@ import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -203,11 +204,23 @@ public int launchContainer(Container container, return -1; } int exitCode = shExec.getExitCode(); - LOG.warn("Exit code from task is : " + exitCode); - String message = shExec.getOutput(); - logOutput(message); - container.handle(new ContainerDiagnosticsUpdateEvent(containerId, - message)); + LOG.warn("Exit code from container " + containerId + " is : " + exitCode); + // 143 (SIGTERM) and 137 (SIGKILL) exit codes means the container was + // terminated/killed forcefully. In all other cases, log the + // container-executor's output + if (exitCode != ExitCode.FORCE_KILLED.getExitCode() + && exitCode != ExitCode.TERMINATED.getExitCode()) { + LOG.warn("Exception from container-launch with container ID: " + + containerId + " and exit code: " + exitCode , e); + logOutput(shExec.getOutput()); + String diagnostics = "Exception from container-launch: \n" + + StringUtils.stringifyException(e) + "\n" + shExec.getOutput(); + container.handle(new ContainerDiagnosticsUpdateEvent(containerId, + diagnostics)); + } else { + container.handle(new ContainerDiagnosticsUpdateEvent(containerId, + "Container killed on request. Exit code is " + exitCode)); + } return exitCode; } finally { ; // diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 4e6cdcb..8a01bc5 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -146,7 +146,8 @@ public void init() throws IOException { shExec.execute(); } catch (ExitCodeException e) { int exitCode = shExec.getExitCode(); - LOG.warn("Exit code from container is : " + exitCode); + LOG.warn("Exit code from container executor initialization is : " + + exitCode, e); logOutput(shExec.getOutput()); throw new IOException("Linux container executor not configured properly" + " (error=" + exitCode + ")", e); @@ -203,10 +204,11 @@ public void startLocalizer(Path nmPrivateContainerTokensPath, } } catch (ExitCodeException e) { int exitCode = shExec.getExitCode(); - LOG.warn("Exit code from container is : " + exitCode); + LOG.warn("Exit code from container " + locId + " startLocalizer is : " + + exitCode, e); logOutput(shExec.getOutput()); - throw new IOException("App initialization failed (" + exitCode + - ") with output: " + shExec.getOutput(), e); + throw new IOException("Application " + appId + " initialization failed (" + + exitCode + ") with output: " + shExec.getOutput(), e); } } @@ -255,19 +257,18 @@ public int launchContainer(Container container, return ExitCode.TERMINATED.getExitCode(); } } catch (ExitCodeException e) { - if (null == shExec) { return -1; } - int exitCode = shExec.getExitCode(); - LOG.warn("Exit code from container is : " + exitCode); + LOG.warn("Exit code from container " + containerId + " is : " + exitCode); // 143 (SIGTERM) and 137 (SIGKILL) exit codes means the container was // terminated/killed forcefully. In all other cases, log the // container-executor's output if (exitCode != ExitCode.FORCE_KILLED.getExitCode() && exitCode != ExitCode.TERMINATED.getExitCode()) { - LOG.warn("Exception from container-launch : ", e); + LOG.warn("Exception from container-launch with container ID: " + + containerId + " and exit code: " + exitCode , e); logOutput(shExec.getOutput()); String diagnostics = "Exception from container-launch: \n" + StringUtils.stringifyException(e) + "\n" + shExec.getOutput(); @@ -309,9 +310,11 @@ public boolean signalContainer(String user, String pid, Signal signal) if (ret_code == ResultCode.INVALID_CONTAINER_PID.getValue()) { return false; } + LOG.warn("Error in signalling container " + pid + " with " + signal + + "; exit = " + ret_code, e); logOutput(shExec.getOutput()); throw new IOException("Problem signalling container " + pid + " with " + - signal + "; exit = " + ret_code); + signal + "; exit = " + ret_code, e); } return true; } @@ -345,13 +348,10 @@ public void deleteAsUser(String user, Path dir, Path... baseDirs) { } } catch (IOException e) { int exitCode = shExec.getExitCode(); - LOG.warn("Exit code from container is : " + exitCode); - if (exitCode != 0) { - LOG.error("DeleteAsUser for " + dir.toUri().getPath() - + " returned with non-zero exit code" + exitCode); - LOG.error("Output from LinuxContainerExecutor's deleteAsUser follows:"); - logOutput(shExec.getOutput()); - } + LOG.error("DeleteAsUser for " + dir.toUri().getPath() + + " returned with exit code: " + exitCode, e); + LOG.error("Output from LinuxContainerExecutor's deleteAsUser follows:"); + logOutput(shExec.getOutput()); } } @@ -372,9 +372,10 @@ public void mountCgroups(List cgroupKVs, String hierarchy) shExec.execute(); } catch (IOException e) { int ret_code = shExec.getExitCode(); + LOG.warn("Exception in LinuxContainerExecutor mountCgroups ", e); logOutput(shExec.getOutput()); throw new IOException("Problem mounting cgroups " + cgroupKVs + - "; exit code = " + ret_code, e); + "; exit code = " + ret_code + " and output: " + shExec.getOutput(), e); } } } diff --git hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 231e2fa..e1504ef 100644 --- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; @@ -98,7 +99,7 @@ public void testSpecialCharSymlinks() throws IOException { tempFile = Shell.appendScriptExtension(tmpDir, "temp"); String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" : "echo \"hello\""; - PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); + PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); FileUtil.setExecutable(shellFile, true); writer.println(timeoutCommand); writer.close(); @@ -132,7 +133,7 @@ public void testSpecialCharSymlinks() throws IOException { assertEquals(shexc.getExitCode(), 0); assert(shexc.getOutput().contains("hello")); - symLinkFile = new File(tmpDir, badSymlink); + symLinkFile = new File(tmpDir, badSymlink); } finally { // cleanup @@ -151,6 +152,128 @@ public void testSpecialCharSymlinks() throws IOException { } } + // test the diagnostics are generated + @Test (timeout = 20000) + public void testInvalidSymlinkDiagnostics() throws IOException { + + File shellFile = null; + File tempFile = null; + String symLink = Shell.WINDOWS ? "test.cmd" : + "test"; + File symLinkFile = null; + + try { + shellFile = Shell.appendScriptExtension(tmpDir, "hello"); + tempFile = Shell.appendScriptExtension(tmpDir, "temp"); + String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" : + "echo \"hello\""; + PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); + FileUtil.setExecutable(shellFile, true); + writer.println(timeoutCommand); + writer.close(); + + Map> resources = + new HashMap>(); + //This is an invalid path and should throw exception because of No such file. + Path invalidPath = new Path(shellFile.getAbsolutePath()+"randomPath"); + resources.put(invalidPath, Arrays.asList(symLink)); + FileOutputStream fos = new FileOutputStream(tempFile); + + Map env = new HashMap(); + List commands = new ArrayList(); + if (Shell.WINDOWS) { + commands.add("cmd"); + commands.add("/c"); + commands.add("\"" + symLink + "\""); + } else { + commands.add("/bin/sh ./\\\"" + symLink + "\\\""); + } + ContainerLaunch.writeLaunchEnv(fos, env, resources, commands); + fos.flush(); + fos.close(); + FileUtil.setExecutable(tempFile, true); + + Shell.ShellCommandExecutor shexc + = new Shell.ShellCommandExecutor(new String[]{tempFile.getAbsolutePath()}, tmpDir); + boolean catchException = false; + String diagnostics = null; + try { + shexc.execute(); + } catch(ExitCodeException e){ + catchException = true; + diagnostics = e.getMessage(); + } + Assert.assertTrue(catchException); + Assert.assertNotNull(diagnostics); + Assert.assertTrue(shexc.getExitCode() != 0); + symLinkFile = new File(tmpDir, symLink); + } + finally { + // cleanup + if (shellFile != null + && shellFile.exists()) { + shellFile.delete(); + } + if (tempFile != null + && tempFile.exists()) { + tempFile.delete(); + } + if (symLinkFile != null + && symLinkFile.exists()) { + symLinkFile.delete(); + } + } + } + + @Test (timeout = 20000) + public void testInvalidEnvSyntaxDiagnostics() throws IOException { + + File shellFile = null; + try { + shellFile = Shell.appendScriptExtension(tmpDir, "hello"); + String timeoutCommand = Shell.WINDOWS ? "@echo \"hello\"" : + "echo \"hello\""; + PrintWriter writer = new PrintWriter(new FileOutputStream(shellFile)); + FileUtil.setExecutable(shellFile, true); + writer.println(timeoutCommand); + writer.close(); + Map> resources = + new HashMap>(); + FileOutputStream fos = new FileOutputStream(shellFile); + + Map env = new HashMap(); + env.put( + "APPLICATION_WORKFLOW_CONTEXT", "{\"workflowId\":\"609f91c5cd83\"," + + "\"workflowName\":\"\n\ninsert table " + + "\npartition (cd_education_status)\nselect cd_demo_sk, cd_gender, " ); + List commands = new ArrayList(); + ContainerLaunch.writeLaunchEnv(fos, env, resources, commands); + fos.flush(); + fos.close(); + + Shell.ShellCommandExecutor shexc + = new Shell.ShellCommandExecutor(new String[]{shellFile.getAbsolutePath()}, tmpDir); + boolean catchException = false; + String diagnostics = null; + try { + shexc.execute(); + } catch(ExitCodeException e){ + catchException = true; + diagnostics = e.getMessage(); + } + Assert.assertTrue(catchException); + Assert.assertNotNull(diagnostics); + Assert.assertTrue(shexc.getExitCode() != 0); + } + finally { + // cleanup + if (shellFile != null + && shellFile.exists()) { + shellFile.delete(); + } + } + } + /** * See if environment variable is forwarded using sanitizeEnv. * @throws Exception